mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Compare commits
9 Commits
feature/bl
...
bugfix/dmi
Author | SHA1 | Date | |
---|---|---|---|
d0c2c9c71f | |||
c8cafa77ca | |||
a3bcad3804 | |||
5a5b66292b | |||
e63be32ad2 | |||
6aa106d906 | |||
33d59c8869 | |||
a833fd8dbf | |||
e9712bc7fb |
23
.gitignore
vendored
23
.gitignore
vendored
@ -83,7 +83,6 @@ ltmain.sh
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.dirstamp
|
||||
|
||||
# build directory #
|
||||
###################
|
||||
@ -93,24 +92,28 @@ build*/*
|
||||
#####################
|
||||
*.xcodeproj/*
|
||||
build.sh
|
||||
.vscode
|
||||
*.code-workspace
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
Grid/Eigen
|
||||
Eigen/*
|
||||
lib/Eigen/*
|
||||
|
||||
# FFTW source #
|
||||
################
|
||||
lib/fftw/*
|
||||
|
||||
# libtool macros #
|
||||
##################
|
||||
m4/lt*
|
||||
m4/libtool.m4
|
||||
|
||||
# github pages #
|
||||
################
|
||||
gh-pages/
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
buck-out
|
||||
BUCK
|
||||
make-bin-BUCK.sh
|
||||
|
||||
# generated sources #
|
||||
#####################
|
||||
Grid/qcd/spin/gamma-gen/*.h
|
||||
Grid/qcd/spin/gamma-gen/*.cc
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
101
.travis.yml
101
.travis.yml
@ -7,13 +7,64 @@ cache:
|
||||
matrix:
|
||||
include:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
osx_image: xcode7.2
|
||||
compiler: clang
|
||||
env: PREC=single
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=double
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.9
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-4.9
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-5
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
- compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -21,41 +72,35 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
- which autoconf
|
||||
- autoconf --version
|
||||
- which automake
|
||||
- automake --version
|
||||
- which $CC
|
||||
- $CC --version
|
||||
- which $CXX
|
||||
- $CXX --version
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
||||
|
||||
script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
- ./benchmarks/Benchmark_dwf --threads 1
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1
|
||||
- echo make clean
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||
- make -j4
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||
|
||||
|
||||
|
@ -1,37 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/DisableWarnings.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef DISABLE_WARNINGS_H
|
||||
#define DISABLE_WARNINGS_H
|
||||
|
||||
//disables and intel compiler specific warning (in json.hpp)
|
||||
#pragma warning disable 488
|
||||
|
||||
|
||||
#endif
|
@ -1,29 +0,0 @@
|
||||
#ifndef GRID_STD_H
|
||||
#define GRID_STD_H
|
||||
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
#include <zlib.h>
|
||||
|
||||
///////////////////
|
||||
// Grid config
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
|
||||
#endif /* GRID_STD_H */
|
@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||
#ifdef USE_MKL
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
#include <Grid/Eigen/Dense>
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
@ -1,63 +0,0 @@
|
||||
extra_sources=
|
||||
extra_headers=
|
||||
|
||||
if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_mpi3.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryMPI.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_NONE
|
||||
extra_sources+=communicator/Communicator_none.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryNone.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_HDF5
|
||||
extra_sources+=serialisation/Hdf5IO.cc
|
||||
extra_headers+=serialisation/Hdf5IO.h
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
all: version-cache
|
||||
|
||||
version-cache:
|
||||
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||
a="uncommited changes";\
|
||||
else\
|
||||
a="clean";\
|
||||
fi;\
|
||||
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||
if [ -e version-cache ]; then\
|
||||
d=`diff vertmp version-cache`;\
|
||||
if [ "$${d}" != "" ]; then\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
else\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
rm -f vertmp
|
||||
|
||||
Version.h:
|
||||
cp version-cache Version.h
|
||||
|
||||
.PHONY: version-cache
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
include Make.inc
|
||||
include Eigen.inc
|
||||
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers) Config.h Version.h
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@ -1,152 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Forecast.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_FORECAST_H
|
||||
#define INCLUDED_FORECAST_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Abstract base class.
|
||||
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
|
||||
// and returns a forecasted solution to the system D*psi = phi (psi).
|
||||
template<class Matrix, class Field>
|
||||
class Forecast
|
||||
{
|
||||
public:
|
||||
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
|
||||
};
|
||||
|
||||
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
|
||||
// used to forecast solutions across poles of the EOFA heatbath.
|
||||
//
|
||||
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
|
||||
template<class Matrix, class Field>
|
||||
class ChronoForecast : public Forecast<Matrix,Field>
|
||||
{
|
||||
public:
|
||||
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
|
||||
{
|
||||
int degree = prev_solns.size();
|
||||
Field chi(phi); // forecasted solution
|
||||
|
||||
// Trivial cases
|
||||
if(degree == 0){ chi = zero; return chi; }
|
||||
else if(degree == 1){ return prev_solns[0]; }
|
||||
|
||||
RealD dot;
|
||||
ComplexD xp;
|
||||
Field r(phi); // residual
|
||||
Field Mv(phi);
|
||||
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
|
||||
std::vector<Field> MdagMv(degree,phi);
|
||||
|
||||
// Array to hold the matrix elements
|
||||
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
|
||||
|
||||
// Solution and source vectors
|
||||
std::vector<ComplexD> a(degree);
|
||||
std::vector<ComplexD> b(degree);
|
||||
|
||||
// Orthonormalize the vector basis
|
||||
for(int i=0; i<degree; i++){
|
||||
v[i] *= 1.0/std::sqrt(norm2(v[i]));
|
||||
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
|
||||
}
|
||||
|
||||
// Perform sparse matrix multiplication and construct rhs
|
||||
for(int i=0; i<degree; i++){
|
||||
b[i] = innerProduct(v[i],phi);
|
||||
Mat.M(v[i],Mv);
|
||||
Mat.Mdag(Mv,MdagMv[i]);
|
||||
G[i][i] = innerProduct(v[i],MdagMv[i]);
|
||||
}
|
||||
|
||||
// Construct the matrix
|
||||
for(int j=0; j<degree; j++){
|
||||
for(int k=j+1; k<degree; k++){
|
||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||
G[k][j] = std::conj(G[j][k]);
|
||||
}}
|
||||
|
||||
// Gauss-Jordan elimination with partial pivoting
|
||||
for(int i=0; i<degree; i++){
|
||||
|
||||
// Perform partial pivoting
|
||||
int k = i;
|
||||
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
|
||||
if(k != i){
|
||||
xp = b[k];
|
||||
b[k] = b[i];
|
||||
b[i] = xp;
|
||||
for(int j=0; j<degree; j++){
|
||||
xp = G[k][j];
|
||||
G[k][j] = G[i][j];
|
||||
G[i][j] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert matrix to upper triangular form
|
||||
for(int j=i+1; j<degree; j++){
|
||||
xp = G[j][i]/G[i][i];
|
||||
b[j] -= xp * b[i];
|
||||
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
|
||||
}
|
||||
}
|
||||
|
||||
// Use Gaussian elimination to solve equations and calculate initial guess
|
||||
chi = zero;
|
||||
r = phi;
|
||||
for(int i=degree-1; i>=0; i--){
|
||||
a[i] = 0.0;
|
||||
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
|
||||
a[i] = (b[i]-a[i])/G[i][i];
|
||||
chi += a[i]*v[i];
|
||||
r -= a[i]*MdagMv[i];
|
||||
}
|
||||
|
||||
RealD true_r(0.0);
|
||||
ComplexD tmp;
|
||||
for(int i=0; i<degree; i++){
|
||||
tmp = -b[i];
|
||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||
tmp = std::conj(tmp)*tmp;
|
||||
true_r += std::sqrt(tmp.real());
|
||||
}
|
||||
|
||||
RealD error = std::sqrt(norm2(r)/norm2(phi));
|
||||
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
|
||||
|
||||
return chi;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,698 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
int blockDim ;
|
||||
int Nblock;
|
||||
|
||||
BlockCGtype CGtype;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer PrintInterval; //GridLogMessages or Iterative
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
//
|
||||
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
||||
//
|
||||
// Q C = R => Q = R C^{-1}
|
||||
//
|
||||
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
||||
//
|
||||
// Set C = L^{dag}, and then Q^dag Q = ident
|
||||
//
|
||||
// Checks:
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Q = R C^{-1}
|
||||
//
|
||||
// Q_j = R_i Cinv(i,j)
|
||||
//
|
||||
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
// see comments above
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
std::vector<Field> & Q,
|
||||
const std::vector<Field> & R)
|
||||
{
|
||||
InnerProductMatrix(m_rr,R,R);
|
||||
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
|
||||
MulMatrix(Q,Cinv,R);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQVec ) {
|
||||
BlockCGrQsolveVec(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B._grid->_fdimensions[Orthog];
|
||||
/* FAKE */
|
||||
Nblock=8;
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
Field Q(B);
|
||||
Field D(B);
|
||||
Field Z(B);
|
||||
Field AD(B);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,B,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,B,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,X,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
std::vector<ComplexD> v_pAp(Nblock);
|
||||
std::vector<RealD> v_rr (Nblock);
|
||||
std::vector<RealD> v_rr_inv(Nblock);
|
||||
std::vector<RealD> v_alpha(Nblock);
|
||||
std::vector<RealD> v_beta(Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch sliceNormTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductVector(v_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
|
||||
}
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_rr_inv[b] = 1.0/v_rr[b];
|
||||
}
|
||||
sliceNormTimer.Start();
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
sliceNormTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_beta[b] = v_rr_inv[b] *v_rr[b];
|
||||
}
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(P,v_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
RealD rr = v_rr[b]/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
|
||||
for(int b=0;b<Nblock;b++){
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
m(b,bp) = innerProduct(X[b],Y[bp]);
|
||||
}}
|
||||
}
|
||||
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
// Deal with case AP aliases with either Y or X
|
||||
std::vector<Field> tmp(Nblock,X[0]);
|
||||
for(int b=0;b<Nblock;b++){
|
||||
tmp[b] = Y[b];
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = tmp[b];
|
||||
}
|
||||
}
|
||||
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = zero;
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
AP[b] += (m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
}
|
||||
double normv(const std::vector<Field> &P){
|
||||
double nn = 0.0;
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
nn+=norm2(P[b]);
|
||||
}
|
||||
return nn;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQvec implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
|
||||
{
|
||||
Nblock = B.size();
|
||||
assert(Nblock == X.size());
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
X[b].checkerboard = B[b].checkerboard;
|
||||
conformable(X[b], B[b]);
|
||||
conformable(X[b], X[0]);
|
||||
}
|
||||
|
||||
Field Fake(B[0]);
|
||||
|
||||
std::vector<Field> tmp(Nblock,Fake);
|
||||
std::vector<Field> Q(Nblock,Fake);
|
||||
std::vector<Field> D(Nblock,Fake);
|
||||
std::vector<Field> Z(Nblock,Fake);
|
||||
std::vector<Field> AD(Nblock,Fake);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
Linop.HermOp(X[b], AD[b]);
|
||||
tmp[b] = B[b] - AD[b];
|
||||
}
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
|
||||
for(int b=0;b<Nblock;b++) D[b]=Q[b];
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
InnerProductMatrix(m_DZ,D,Z);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(X,m_tmp, D,X);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(tmp,m_M,Z,Q,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(D,m_tmp,D,Q);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1,256 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer ReliableUpdatesPerformed;
|
||||
|
||||
bool DoFinalCleanup; //Final DP cleanup, defaults to true
|
||||
Integer IterationsToCleanup; //Final DP cleanup step iterations
|
||||
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
GridBase* SinglePrecGrid;
|
||||
RealD Delta; //reliable update parameter
|
||||
|
||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||
RealD fallback_transition_tol;
|
||||
|
||||
|
||||
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Delta(_delta),
|
||||
Linop_f(_Linop_f),
|
||||
Linop_d(_Linop_d),
|
||||
SinglePrecGrid(_sp_grid),
|
||||
ErrorOnNoConverge(err_on_no_conv),
|
||||
DoFinalCleanup(true),
|
||||
Linop_fallback(NULL)
|
||||
{};
|
||||
|
||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||
Linop_fallback = &_Linop_fallback;
|
||||
fallback_transition_tol = _fallback_transition_tol;
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
FieldD p(src);
|
||||
FieldD mmp(src);
|
||||
FieldD r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
//Single prec initialization
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
r_f.checkerboard = r.checkerboard;
|
||||
precisionChange(r_f, r);
|
||||
|
||||
FieldF psi_f(r_f);
|
||||
psi_f = zero;
|
||||
|
||||
FieldF p_f(r_f);
|
||||
FieldF mmp_f(r_f);
|
||||
|
||||
RealD MaxResidSinceLastRelUp = cp; //initial residual
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k = 0;
|
||||
int l = 0;
|
||||
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
cp = axpy_norm(r_f, -a, mmp_f, r_f);
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi_f = a * p_f + psi_f;
|
||||
//p_f = p_f * b + r_f;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
if(cp > MaxResidSinceLastRelUp){
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
//Although not written in the paper, I assume that I have to add on the final solution
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
|
||||
if(DoFinalCleanup){
|
||||
//Do a final CG to cleanup
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
|
||||
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
|
||||
CG.ErrorOnNoConverge = ErrorOnNoConverge;
|
||||
CG(Linop_d,src,psi);
|
||||
IterationsToCleanup = CG.IterationsToComplete;
|
||||
}
|
||||
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
|
||||
return;
|
||||
}
|
||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
r = src - mmp;
|
||||
|
||||
psi_f = zero;
|
||||
precisionChange(r_f, r);
|
||||
cp = norm2(r);
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
|
||||
b = cp/c;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
|
||||
|
||||
l = l+1;
|
||||
}
|
||||
|
||||
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
|
||||
|
||||
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
|
||||
Linop_f_use = Linop_fallback;
|
||||
using_fallback = true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,104 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = zero;
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.checkerboard = src.checkerboard;
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0]._grid);
|
||||
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.checkerboard = src.checkerboard;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -1,842 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BIRL_H
|
||||
#define GRID_BIRL_H
|
||||
|
||||
#include <string.h> //memset
|
||||
//#include <zlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Move following 100 LOC to lattice/Lattice_basis.h
|
||||
////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0]._grid;
|
||||
|
||||
parallel_region
|
||||
{
|
||||
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
|
||||
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis[k]._odata[ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis[j]._odata[ss] = B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0]._grid;
|
||||
|
||||
result.checkerboard = basis[0].checkerboard;
|
||||
parallel_for(int ss=0;ss < grid->oSites();ss++){
|
||||
vobj B = zero;
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B +=Qt(j,k) * basis[k]._odata[ss];
|
||||
}
|
||||
result._odata[ss] = B;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class ImplicitlyRestartedLanczosTester
|
||||
{
|
||||
public:
|
||||
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
};
|
||||
|
||||
enum IRLdiagonalisation {
|
||||
IRLdiagonaliseWithDSTEGR,
|
||||
IRLdiagonaliseWithQR,
|
||||
IRLdiagonaliseWithEigen
|
||||
};
|
||||
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
return TestConvergence(j,resid,B,eval,evalMaxApprox);
|
||||
}
|
||||
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
Field v(B);
|
||||
RealD eval_poly = eval;
|
||||
// Apply operator
|
||||
_HermOp(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class ImplicitlyRestartedLanczos {
|
||||
private:
|
||||
const RealD small = 1.0e-8;
|
||||
int MaxIter;
|
||||
int MinRestart; // Minimum number of restarts; only check for convergence after
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
|
||||
int Nm; // Nm -- total number of vectors
|
||||
IRLdiagonalisation diagonalisation;
|
||||
int orth_period;
|
||||
|
||||
RealD OrthoTime;
|
||||
RealD eresid, betastp;
|
||||
////////////////////////////////
|
||||
// Embedded objects
|
||||
////////////////////////////////
|
||||
LinearFunction<Field> &_PolyOp;
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosTester<Field> &_Tester;
|
||||
// Default tester provided (we need a ref to something in default case)
|
||||
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Too many options & knobs.
|
||||
// Eliminate:
|
||||
// orth_period
|
||||
// betastp
|
||||
// MinRestart
|
||||
//
|
||||
// Do we really need orth_period
|
||||
// What is the theoretical basis & guarantees of betastp ?
|
||||
// Nstop=Nk viable?
|
||||
// MinRestart avoidable with new convergence test?
|
||||
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
|
||||
// HermOp could be eliminated if we dropped the Power method for max eval.
|
||||
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
|
||||
//////////////////////////////////////////////////////////////////
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
ImplicitlyRestartedLanczosTester<Field> & Tester,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
////////////////////////////////
|
||||
// Helpers
|
||||
////////////////////////////////
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
|
||||
{
|
||||
OrthoTime-=usecond()/1e6;
|
||||
basisOrthogonalize(evec,w,k);
|
||||
normalise(w);
|
||||
OrthoTime+=usecond()/1e6;
|
||||
}
|
||||
|
||||
/* Rudy Arthur's thesis pp.137
|
||||
------------------------
|
||||
Require: M > K P = M − K †
|
||||
Compute the factorization AVM = VM HM + fM eM
|
||||
repeat
|
||||
Q=I
|
||||
for i = 1,...,P do
|
||||
QiRi =HM −θiI Q = QQi
|
||||
H M = Q †i H M Q i
|
||||
end for
|
||||
βK =HM(K+1,K) σK =Q(M,K)
|
||||
r=vK+1βK +rσK
|
||||
VK =VM(1:M)Q(1:M,1:K)
|
||||
HK =HM(1:K,1:K)
|
||||
→AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
|
||||
until convergence
|
||||
*/
|
||||
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
|
||||
{
|
||||
GridBase *grid = src._grid;
|
||||
assert(grid == evec[0]._grid);
|
||||
|
||||
GridLogIRL.TimingMode(1);
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
|
||||
}
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
|
||||
assert(Nm <= evec.size() && Nm <= eval.size());
|
||||
|
||||
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
|
||||
RealD evalMaxApprox = 0.0;
|
||||
{
|
||||
auto src_n = src;
|
||||
auto tmp = src;
|
||||
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
||||
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
||||
normalise(src_n);
|
||||
_HermOp(src_n,tmp);
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
|
||||
i=_MAX_ITER_IRL_MEVAPP_;
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
src_n = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<RealD> lme(Nm);
|
||||
std::vector<RealD> lme2(Nm);
|
||||
std::vector<RealD> eval2(Nm);
|
||||
std::vector<RealD> eval2_copy(Nm);
|
||||
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
|
||||
|
||||
Field f(grid);
|
||||
Field v(grid);
|
||||
int k1 = 1;
|
||||
int k2 = Nk;
|
||||
RealD beta_k;
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
// Set initial vector
|
||||
evec[0] = src;
|
||||
normalise(evec[0]);
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime=0.;
|
||||
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// Restarting loop begins
|
||||
//////////////////////////////////
|
||||
int iter;
|
||||
for(iter = 0; iter<MaxIter; ++iter){
|
||||
|
||||
OrthoTime=0.;
|
||||
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
|
||||
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
|
||||
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
f *= lme[Nm-1];
|
||||
|
||||
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// getting eigenvalues
|
||||
//////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k+k1-1];
|
||||
lme2[k] = lme[k+k1-1];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// sorting
|
||||
//////////////////////////////////
|
||||
eval2_copy = eval2;
|
||||
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
|
||||
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
|
||||
const int chunk=8;
|
||||
for(int io=0; io<k2;io+=chunk){
|
||||
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
|
||||
for(int ii=0;ii<chunk;ii++){
|
||||
if ( (io+ii)<k2 )
|
||||
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Implicitly shifted QR transformations
|
||||
//////////////////////////////////
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
for(int ip=k2; ip<Nm; ++ip){
|
||||
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
||||
}
|
||||
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
|
||||
|
||||
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
|
||||
|
||||
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
|
||||
std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Compressed vector f and beta(k2)
|
||||
////////////////////////////////////////////////////
|
||||
f *= Qt(k2-1,Nm-1);
|
||||
f += lme[k2-1] * evec[k2];
|
||||
beta_k = norm2(f);
|
||||
beta_k = sqrt(beta_k);
|
||||
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
|
||||
|
||||
RealD betar = 1.0/beta_k;
|
||||
evec[k2] = betar * f;
|
||||
lme[k2-1] = beta_k;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Convergence test
|
||||
////////////////////////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k];
|
||||
lme2[k] = lme[k];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
|
||||
|
||||
Nconv = 0;
|
||||
if (iter >= MinRestart) {
|
||||
|
||||
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
|
||||
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
{
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
||||
if( Nconv>=Nstop){
|
||||
goto converged;
|
||||
}
|
||||
|
||||
} else {
|
||||
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
|
||||
} // end of iter loop
|
||||
}
|
||||
|
||||
std::cout<<GridLogError<<"\n NOT converged.\n";
|
||||
abort();
|
||||
|
||||
converged:
|
||||
{
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
|
||||
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
|
||||
Nconv=0;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Full final convergence test; unconditionally applied
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
for(int j = 0; j<=Nk; j++){
|
||||
B=evec[j];
|
||||
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
|
||||
Nconv++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nconv < Nstop )
|
||||
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
|
||||
|
||||
eval=eval2;
|
||||
|
||||
//Keep only converged
|
||||
eval.resize(Nconv);// Nstop?
|
||||
evec.resize(Nconv,grid);// Nstop?
|
||||
basisSortInPlace(evec,eval,reverse);
|
||||
|
||||
}
|
||||
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
|
||||
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
|
||||
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
/* Saad PP. 195
|
||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
||||
2. For k = 1,2,...,m Do:
|
||||
3. wk:=Avk−βkv_{k−1}
|
||||
4. αk:=(wk,vk) //
|
||||
5. wk:=wk−αkvk // wk orthog vk
|
||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
7. vk+1 := wk/βk+1
|
||||
8. EndDo
|
||||
*/
|
||||
void step(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
std::vector<Field>& evec,
|
||||
Field& w,int Nm,int k)
|
||||
{
|
||||
const RealD tiny = 1.0e-20;
|
||||
assert( k< Nm );
|
||||
|
||||
GridStopWatch gsw_op,gsw_o;
|
||||
|
||||
Field& evec_k = evec[k];
|
||||
|
||||
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
|
||||
|
||||
if(k>0) w -= lme[k-1] * evec[k-1];
|
||||
|
||||
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
|
||||
RealD alph = real(zalph);
|
||||
|
||||
w = w - alph * evec_k;// 5. wk:=wk−αkvk
|
||||
|
||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
if (k>0 && k % orth_period == 0) {
|
||||
orthogonalize(w,evec,k); // orthonormalise
|
||||
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
|
||||
}
|
||||
|
||||
if(k < Nm-1) evec[k+1] = w;
|
||||
|
||||
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
||||
if ( beta < tiny )
|
||||
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
|
||||
}
|
||||
|
||||
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt, // Nm x Nm
|
||||
GridBase *grid)
|
||||
{
|
||||
Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
|
||||
|
||||
for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
|
||||
|
||||
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
|
||||
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
|
||||
}
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
for (int j = 0; j < Nk; j++) {
|
||||
Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// File could end here if settle on Eigen ??? !!!
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
void QR_decomp(std::vector<RealD>& lmd, // Nm
|
||||
std::vector<RealD>& lme, // Nm
|
||||
int Nk, int Nm, // Nk, Nm
|
||||
Eigen::MatrixXd& Qt, // Nm x Nm matrix
|
||||
RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin-1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
|
||||
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||
RealD s = -lme[k] *Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
x =-s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 - s*Qtmp2;
|
||||
Qt(k+1,i)= s*Qtmp1 + c*Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for(int k = kmin; k < kmax-1; ++k){
|
||||
|
||||
RealD Fden = 1.0/hypot(x,lme[k-1]);
|
||||
RealD c = lme[k-1]*Fden;
|
||||
RealD s = - x*Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
lme[k-1] = c*lme[k-1] -s*x;
|
||||
|
||||
if(k != kmax-2){
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
}
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 -s*Qtmp2;
|
||||
Qt(k+1,i) = s*Qtmp1 +c*Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
#endif
|
||||
|
||||
void diagonalize_lapack(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd& Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
#ifdef USE_LAPACK
|
||||
const int size = Nm;
|
||||
int NN = Nk;
|
||||
double evals_tmp[NN];
|
||||
double evec_tmp[NN][NN];
|
||||
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i< NN; i++) {
|
||||
for (int j = i - 1; j <= i + 1; j++) {
|
||||
if ( j < NN && j >= 0 ) {
|
||||
if (i==j) DD[i] = lmd[i];
|
||||
if (i==j) evals_tmp[i] = lmd[i];
|
||||
if (j==(i-1)) EE[j] = lme[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
int evals_found;
|
||||
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
|
||||
int liwork = 3+NN*10 ;
|
||||
int iwork[liwork];
|
||||
double work[lwork];
|
||||
int isuppz[2*NN];
|
||||
char jobz = 'V'; // calculate evals & evecs
|
||||
char range = 'I'; // calculate all evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
int info;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN/total)+1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
int il = interval*node+1 , iu = interval*(node+1);
|
||||
if (iu > NN) iu=NN;
|
||||
double tol = 0.0;
|
||||
if (1) {
|
||||
memset(evals_tmp,0,sizeof(double)*NN);
|
||||
if ( il <= NN){
|
||||
LAPACK_dstegr(&jobz, &range, &NN,
|
||||
(double*)DD, (double*)EE,
|
||||
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double*)evec_tmp, &NN,
|
||||
isuppz,
|
||||
work, &lwork, iwork, &liwork,
|
||||
&info);
|
||||
for (int i = iu-1; i>= il-1; i--){
|
||||
evals_tmp[i] = evals_tmp[i - (il-1)];
|
||||
if (il>1) evals_tmp[i-(il-1)]=0.;
|
||||
for (int j = 0; j< NN; j++){
|
||||
evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
|
||||
if (il>1) evec_tmp[i-(il-1)][j]=0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector(evals_tmp,NN);
|
||||
grid->GlobalSumVector((double*)evec_tmp,NN*NN);
|
||||
}
|
||||
}
|
||||
// Safer to sort instead of just reversing it,
|
||||
// but the document of the routine says evals are sorted in increasing order.
|
||||
// qr gives evals in decreasing order.
|
||||
for(int i=0;i<NN;i++){
|
||||
lmd [NN-1-i]=evals_tmp[i];
|
||||
for(int j=0;j<NN;j++){
|
||||
Qt((NN-1-i),j)=evec_tmp[i][j];
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
int QRiter = 100*Nm;
|
||||
int kmin = 1;
|
||||
int kmax = Nk;
|
||||
|
||||
// (this should be more sophisticated)
|
||||
for(int iter=0; iter<QRiter; ++iter){
|
||||
|
||||
// determination of 2x2 leading submatrix
|
||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||
// (Dsh: shift)
|
||||
|
||||
// transformation
|
||||
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
|
||||
|
||||
// Convergence criterion (redef of kmin and kamx)
|
||||
for(int j=kmax-1; j>= kmin; --j){
|
||||
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||
if(fabs(lme[j-1])+dds > dds){
|
||||
kmax = j+1;
|
||||
goto continued;
|
||||
}
|
||||
}
|
||||
QRiter = iter;
|
||||
return;
|
||||
|
||||
continued:
|
||||
for(int j=0; j<kmax-1; ++j){
|
||||
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||
if(fabs(lme[j])+dds > dds){
|
||||
kmin = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
|
||||
abort();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,406 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
struct LanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||
ChebyParams, Cheby,/*Chebyshev*/
|
||||
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
|
||||
int, Nk, /*Vecs in Lanczos seek converge*/
|
||||
int, Nm, /*Total vecs in Lanczos include restart*/
|
||||
RealD, resid, /*residual*/
|
||||
int, MaxIt,
|
||||
RealD, betastp, /* ? */
|
||||
int, MinRes); // Must restart
|
||||
};
|
||||
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
bool, doCoarseRead,
|
||||
LanczosParams, FineParams,
|
||||
LanczosParams, CoarseParams,
|
||||
ChebyParams, Smoother,
|
||||
RealD , coarse_relax_tol,
|
||||
std::vector<int>, blockSize,
|
||||
std::string, config,
|
||||
std::vector < std::complex<double> >, omega,
|
||||
RealD, mass,
|
||||
RealD, M5);
|
||||
};
|
||||
|
||||
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard= checkerboard;
|
||||
FineField fout(FineGrid); fout.checkerboard = checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard =checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
return conv;
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _subspace[0]._grid;
|
||||
int checkerboard = _subspace[0].checkerboard;
|
||||
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
||||
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
||||
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
_Linop.HermOp(fB,fv);
|
||||
|
||||
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
|
||||
RealD vden = norm2(fB);
|
||||
RealD vv0 = norm2(fv);
|
||||
eval = vnum/vden;
|
||||
fv -= eval*fB;
|
||||
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
|
||||
if( (vv<eresid*eresid) ) return 1;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Make serializable Lanczos params
|
||||
////////////////////////////////////////////
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class LocalCoherenceLanczos
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
protected:
|
||||
GridBase *_CoarseGrid;
|
||||
GridBase *_FineGrid;
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = ::sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].checkerboard=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
subspace[k].checkerboard=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
} else {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
assert(nbasis<=Nm);
|
||||
Chebyshev<FineField> Cheby(cheby_parms);
|
||||
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
||||
|
||||
CoarseField src(_CoarseGrid); src=1.0;
|
||||
|
||||
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
int Nconv=0;
|
||||
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
|
||||
assert(Nconv>=Nstop);
|
||||
evals_coarse.resize(Nstop);
|
||||
evec_coarse.resize (Nstop,_CoarseGrid);
|
||||
for (int i=0;i<Nstop;i++){
|
||||
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1,473 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Use base class to share code
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackBase {
|
||||
protected:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Shared code
|
||||
/////////////////////////////////////////////////////////////
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
|
||||
template<class Guesser>
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
int nblock = in.size();
|
||||
|
||||
std::vector<Field> src_o(nblock,grid);
|
||||
std::vector<Field> sol_o(nblock,grid);
|
||||
|
||||
std::vector<Field> guess_save;
|
||||
|
||||
Field resid(fgrid);
|
||||
Field tmp(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Prepare RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++){
|
||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Make the guesses
|
||||
////////////////////////////////////////////////
|
||||
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||
|
||||
for(int b=0;b<nblock;b++){
|
||||
guess(src_o[b],sol_o[b]);
|
||||
|
||||
if ( subGuess ) {
|
||||
guess_save[b] = sol_o[b];
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the block solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++) {
|
||||
|
||||
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||
|
||||
///////// Needs even source //////////////
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Check unprec residual if possible
|
||||
/////////////////////////////////////////////////
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out[b],resid);
|
||||
resid = resid-in[b];
|
||||
RealD ns = norm2(in[b]);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
template<class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field resid(fgrid);
|
||||
Field src_o(grid);
|
||||
Field src_e(grid);
|
||||
Field sol_o(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||
|
||||
////////////////////////////////
|
||||
// Construct the guess
|
||||
////////////////////////////////
|
||||
Field tmp(grid);
|
||||
guess(src_o,sol_o);
|
||||
|
||||
Field guess_save(grid);
|
||||
guess_save = sol_o;
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Fionn A2A boolean behavioural control
|
||||
////////////////////////////////////////////////
|
||||
if (subGuess) sol_o= sol_o-guess_save;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// RedBlack solution needs the even source
|
||||
///////////////////////////////////////////////////
|
||||
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Override in derived. Not virtual as template methods
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e(grid);
|
||||
|
||||
src_e = src_e_c; // Const correctness
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal has Mooee on it.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
//=> psi = MeeInv phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o,tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,125 +0,0 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||
bool MemoryProfiler::debug = false;
|
||||
|
||||
int PointerCache::victim;
|
||||
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return ptr;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<Ncache;e++) {
|
||||
if ( Entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%Ncache;
|
||||
}
|
||||
|
||||
if ( Entries[v].valid ) {
|
||||
ret = Entries[v].address;
|
||||
Entries[v].valid = 0;
|
||||
Entries[v].address = NULL;
|
||||
Entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
Entries[v].address=ptr;
|
||||
Entries[v].bytes =bytes;
|
||||
Entries[v].valid =1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
|
||||
#ifdef _OPENMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
for(int e=0;e<Ncache;e++){
|
||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||
Entries[e].valid = 0;
|
||||
return Entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||
{
|
||||
#ifdef __linux__
|
||||
int fd = open("/proc/self/pagemap", O_RDONLY);
|
||||
assert(fd >= 0);
|
||||
const int page_size = 4096;
|
||||
uint64_t virt_pfn = (uint64_t)Buf / page_size;
|
||||
off_t offset = sizeof(uint64_t) * virt_pfn;
|
||||
uint64_t npages = (BYTES + page_size-1) / page_size;
|
||||
uint64_t pagedata[npages];
|
||||
uint64_t ret = lseek(fd, offset, SEEK_SET);
|
||||
assert(ret == offset);
|
||||
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
|
||||
assert(ret == sizeof(uint64_t) * npages);
|
||||
int nhugepages = npages / 512;
|
||||
int n4ktotal, nnothuge;
|
||||
n4ktotal = 0;
|
||||
nnothuge = 0;
|
||||
for (int i = 0; i < nhugepages; ++i) {
|
||||
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
|
||||
for (int j = 0; j < 512; ++j) {
|
||||
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
|
||||
++n4ktotal;
|
||||
if (pageaddr != baseaddr + j * page_size)
|
||||
++nnothuge;
|
||||
}
|
||||
}
|
||||
int rank = CartesianCommunicator::RankWorld();
|
||||
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string sizeString(const size_t bytes)
|
||||
{
|
||||
constexpr unsigned int bufSize = 256;
|
||||
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
|
||||
char buf[256];
|
||||
size_t s = 0;
|
||||
double count = bytes;
|
||||
|
||||
while (count >= 1024 && s < 7)
|
||||
{
|
||||
s++;
|
||||
count /= 1024;
|
||||
}
|
||||
if (count - floor(count) == 0.0)
|
||||
{
|
||||
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
|
||||
}
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
}
|
@ -1,174 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_full.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_FULL_H
|
||||
#define GRID_CARTESIAN_FULL_H
|
||||
|
||||
namespace Grid{
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Grid Support.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class GridCartesian: public GridBase {
|
||||
|
||||
public:
|
||||
int dummy;
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardFromOindex (int Oindex)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoarded(int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoard(const std::vector<int> &site){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
|
||||
return shift;
|
||||
}
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
|
||||
return shift;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Constructor takes a parent grid and possibly subdivides communicator.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Construct from comm world
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
|
||||
virtual ~GridCartesian() = default;
|
||||
|
||||
void Init(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_isCheckerBoarded = false;
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d]; // Global dimensions
|
||||
_gdimensions[d] = _fdimensions[d]; // Global dimensions
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// subplane information
|
||||
///////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,514 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/communicator/SharedMemory.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
Grid_MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// First initialise of comms system
|
||||
////////////////////////////////////////////
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||
{
|
||||
|
||||
int flag;
|
||||
int provided;
|
||||
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
// Never clean up as done once.
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
GlobalSharedMemory::SharedMemoryAllocate(
|
||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
|
||||
GlobalSharedMemory::Hugepages);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Use cartesian communicators now even in MPI3
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
|
||||
assert(ierr==0);
|
||||
}
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||
{
|
||||
int rank;
|
||||
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
|
||||
assert(ierr==0);
|
||||
return rank;
|
||||
}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||
{
|
||||
coor.resize(_ndimension);
|
||||
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Initialises from communicator_world
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
MPI_Comm optimal_comm;
|
||||
////////////////////////////////////////////////////
|
||||
// Remap using the shared memory optimising routine
|
||||
// The remap creates a comm which must be freed
|
||||
////////////////////////////////////////////////////
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||
InitFromMPICommunicator(processors,optimal_comm);
|
||||
SetCommunicator(optimal_comm);
|
||||
///////////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////////
|
||||
MPI_Comm_free(&optimal_comm);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Try to subdivide communicator
|
||||
//////////////////////////////////
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
|
||||
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
||||
std::vector<int> parent_processor_coor(_ndimension,0);
|
||||
std::vector<int> parent_processors (_ndimension,1);
|
||||
|
||||
// Can make 5d grid from 4d etc...
|
||||
int pad = _ndimension-parent_ndimension;
|
||||
for(int d=0;d<parent_ndimension;d++){
|
||||
parent_processor_coor[pad+d]=parent._processor_coor[d];
|
||||
parent_processors [pad+d]=parent._processors[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// int Nparent = parent._processors ;
|
||||
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
|
||||
int Nparent;
|
||||
MPI_Comm_size(parent.communicator,&Nparent);
|
||||
// std::cout << " Parent size "<<Nparent <<std::endl;
|
||||
|
||||
int childsize=1;
|
||||
for(int d=0;d<processors.size();d++) {
|
||||
childsize *= processors[d];
|
||||
}
|
||||
int Nchild = Nparent/childsize;
|
||||
assert (childsize * Nchild == Nparent);
|
||||
|
||||
// std::cout << " child size "<<childsize <<std::endl;
|
||||
|
||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
ccoor[d] = parent_processor_coor[d] % processors[d];
|
||||
scoor[d] = parent_processor_coor[d] / processors[d];
|
||||
ssize[d] = parent_processors[d] / processors[d];
|
||||
}
|
||||
|
||||
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
||||
int crank;
|
||||
// Mpi uses the reverse Lexico convention to us; so reversed routines called
|
||||
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
|
||||
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
|
||||
|
||||
MPI_Comm comm_split;
|
||||
if ( Nchild > 1 ) {
|
||||
|
||||
if(0){
|
||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Declare victory
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||
std::cout << " Split communicator " <<comm_split <<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Split the communicator
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
||||
assert(ierr==0);
|
||||
|
||||
} else {
|
||||
srank = 0;
|
||||
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Set up from the new split communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
InitFromMPICommunicator(processors,comm_split);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take the right SHM buffers
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
SetCommunicator(comm_split);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////
|
||||
MPI_Comm_free(&comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
for(int d=0;d<processors.size();d++){
|
||||
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
||||
}
|
||||
}
|
||||
for(int d=0;d<processors.size();d++){
|
||||
assert(_processor_coor[d] == ccoor[d] );
|
||||
}
|
||||
}
|
||||
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
////////////////////////////////////////////////////
|
||||
// Creates communicator, and the communicator_halo
|
||||
////////////////////////////////////////////////////
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
/////////////////////////////////
|
||||
// Count the requested nodes
|
||||
/////////////////////////////////
|
||||
_Nprocessors=1;
|
||||
_processors = processors;
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
|
||||
std::vector<int> periodic(_ndimension,1);
|
||||
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
if ( 0 && (communicator_base != communicator_world) ) {
|
||||
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
||||
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
||||
for(int d=0;d<_processors.size();d++){
|
||||
std::cout << _processor_coor[d]<<" ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
int Size;
|
||||
MPI_Comm_size(communicator,&Size);
|
||||
|
||||
communicator_halo.resize (2*_ndimension);
|
||||
for(int i=0;i<_ndimension*2;i++){
|
||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
||||
}
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
|
||||
CartesianCommunicator::~CartesianCommunicator()
|
||||
{
|
||||
int MPI_is_finalised;
|
||||
MPI_Finalized(&MPI_is_finalised);
|
||||
if (communicator && !MPI_is_finalised) {
|
||||
MPI_Comm_free(&communicator);
|
||||
for(int i=0;i<communicator_halo.size();i++){
|
||||
MPI_Comm_free(&communicator_halo[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||
{
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||
{
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
std::vector<CommsRequest_t> reqs(0);
|
||||
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
|
||||
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
|
||||
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
|
||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||
SendToRecvFromComplete(reqs);
|
||||
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
|
||||
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
|
||||
}
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int sender,
|
||||
int receiver,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Status stat;
|
||||
assert(sender != receiver);
|
||||
int tag = sender;
|
||||
if ( _processor == sender ) {
|
||||
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||
}
|
||||
if ( _processor == receiver ) {
|
||||
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||
}
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
int myrank = _processor;
|
||||
int ierr;
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
} else {
|
||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
||||
recv,bytes,MPI_CHAR,from, from,
|
||||
communicator,MPI_STATUS_IGNORE);
|
||||
assert(ierr==0);
|
||||
}
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
||||
StencilSendToRecvFromComplete(list,dir);
|
||||
return offbytes;
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
int ncomm =communicator_halo.size();
|
||||
int commdir=dir%ncomm;
|
||||
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
int ierr;
|
||||
int gdest = ShmRanks[dest];
|
||||
int gfrom = ShmRanks[from];
|
||||
int gme = ShmRanks[_processor];
|
||||
|
||||
assert(dest != _processor);
|
||||
assert(from != _processor);
|
||||
assert(gme == ShmRank);
|
||||
double off_node_bytes=0.0;
|
||||
|
||||
if ( gfrom ==MPI_UNDEFINED) {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( gdest == MPI_UNDEFINED ) {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
this->StencilSendToRecvFromComplete(list,dir);
|
||||
}
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int nreq=list.size();
|
||||
|
||||
if (nreq==0) return;
|
||||
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
assert(ierr==0);
|
||||
list.resize(0);
|
||||
}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr=MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
int CartesianCommunicator::RankWorld(void){
|
||||
int r;
|
||||
MPI_Comm_rank(communicator_world,&r);
|
||||
return r;
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
std::vector<int> row(_ndimension,1);
|
||||
assert(dim>=0 && dim<_ndimension);
|
||||
|
||||
// Split the communicator
|
||||
row[dim] = _processors[dim];
|
||||
|
||||
int me;
|
||||
CartesianCommunicator Comm(row,*this,me);
|
||||
Comm.AllToAll(in,out,words,bytes);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
// (Turns up on 32^3 x 64 Gparity too)
|
||||
MPI_Datatype object;
|
||||
int iwords;
|
||||
int ibytes;
|
||||
iwords = words;
|
||||
ibytes = bytes;
|
||||
assert(words == iwords); // safe to cast to int ?
|
||||
assert(bytes == ibytes); // safe to cast to int ?
|
||||
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
|
||||
MPI_Type_commit(&object);
|
||||
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
|
||||
MPI_Type_free(&object);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,92 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// static data
|
||||
|
||||
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
||||
int GlobalSharedMemory::Hugepages = 0;
|
||||
int GlobalSharedMemory::_ShmSetup;
|
||||
int GlobalSharedMemory::_ShmAlloc;
|
||||
uint64_t GlobalSharedMemory::_ShmAllocBytes;
|
||||
|
||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
||||
|
||||
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
|
||||
int GlobalSharedMemory::WorldShmRank;
|
||||
int GlobalSharedMemory::WorldShmSize;
|
||||
std::vector<int> GlobalSharedMemory::WorldShmRanks;
|
||||
|
||||
Grid_MPI_Comm GlobalSharedMemory::WorldComm;
|
||||
int GlobalSharedMemory::WorldSize;
|
||||
int GlobalSharedMemory::WorldRank;
|
||||
|
||||
int GlobalSharedMemory::WorldNodes;
|
||||
int GlobalSharedMemory::WorldNode;
|
||||
|
||||
void GlobalSharedMemory::SharedMemoryFree(void)
|
||||
{
|
||||
assert(_ShmAlloc);
|
||||
assert(_ShmAllocBytes>0);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
|
||||
}
|
||||
_ShmAlloc = 0;
|
||||
_ShmAllocBytes = 0;
|
||||
}
|
||||
/////////////////////////////////
|
||||
// Alloc, free shmem region
|
||||
/////////////////////////////////
|
||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||
void *ptr = (void *)heap_top;
|
||||
heap_top += bytes;
|
||||
heap_bytes+= bytes;
|
||||
if (heap_bytes >= heap_size) {
|
||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
||||
assert(heap_bytes<heap_size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void SharedMemory::ShmBufferFreeAll(void) {
|
||||
heap_top =(size_t)ShmBufferSelf();
|
||||
heap_bytes=0;
|
||||
}
|
||||
void *SharedMemory::ShmBufferSelf(void)
|
||||
{
|
||||
return ShmCommBufs[ShmRank];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@ -1,165 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
|
||||
// TODO
|
||||
// 1) move includes into SharedMemory.cc
|
||||
//
|
||||
// 2) split shared memory into a) optimal communicator creation from comm world
|
||||
//
|
||||
// b) shared memory buffers container
|
||||
// -- static globally shared; init once
|
||||
// -- per instance set of buffers.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
#if defined (GRID_COMMS_MPI3)
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
#include <semaphore.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <zlib.h>
|
||||
#ifdef HAVE_NUMAIF_H
|
||||
#include <numaif.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#if defined (GRID_COMMS_MPI3)
|
||||
typedef MPI_Comm Grid_MPI_Comm;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
typedef int Grid_MPI_Comm;
|
||||
#endif
|
||||
|
||||
class GlobalSharedMemory {
|
||||
private:
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
|
||||
// Init once lock on the buffer allocation
|
||||
static int _ShmSetup;
|
||||
static int _ShmAlloc;
|
||||
static uint64_t _ShmAllocBytes;
|
||||
|
||||
public:
|
||||
static int ShmSetup(void) { return _ShmSetup; }
|
||||
static int ShmAlloc(void) { return _ShmAlloc; }
|
||||
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
|
||||
static uint64_t MAX_MPI_SHM_BYTES;
|
||||
static int Hugepages;
|
||||
|
||||
static std::vector<void *> WorldShmCommBufs;
|
||||
|
||||
static Grid_MPI_Comm WorldComm;
|
||||
static int WorldRank;
|
||||
static int WorldSize;
|
||||
|
||||
static Grid_MPI_Comm WorldShmComm;
|
||||
static int WorldShmRank;
|
||||
static int WorldShmSize;
|
||||
|
||||
static int WorldNodes;
|
||||
static int WorldNode;
|
||||
|
||||
static std::vector<int> WorldShmRanks;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
|
||||
static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
|
||||
///////////////////////////////////////////////////
|
||||
// Provide shared memory facilities off comm world
|
||||
///////////////////////////////////////////////////
|
||||
static void SharedMemoryAllocate(uint64_t bytes, int flags);
|
||||
static void SharedMemoryFree(void);
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////
|
||||
// one per communicator
|
||||
//////////////////////////////
|
||||
class SharedMemory
|
||||
{
|
||||
private:
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
|
||||
size_t heap_top;
|
||||
size_t heap_bytes;
|
||||
size_t heap_size;
|
||||
|
||||
protected:
|
||||
|
||||
Grid_MPI_Comm ShmComm; // for barriers
|
||||
int ShmRank;
|
||||
int ShmSize;
|
||||
std::vector<void *> ShmCommBufs;
|
||||
std::vector<int> ShmRanks;// Mapping comm ranks to Shm ranks
|
||||
|
||||
public:
|
||||
SharedMemory() {};
|
||||
~SharedMemory();
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// set the buffers & sizes
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void SetCommunicator(Grid_MPI_Comm comm);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// For this instance ; disjoint buffer sets between splits if split grid
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void ShmBarrier(void);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Call on any instance
|
||||
///////////////////////////////////////////////////
|
||||
void SharedMemoryTest(void);
|
||||
void *ShmBufferSelf(void);
|
||||
void *ShmBuffer (int rank);
|
||||
void *ShmBufferTranslate(int rank,void * local_p);
|
||||
void *ShmBufferMalloc(size_t bytes);
|
||||
void ShmBufferFreeAll(void) ;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Make info on Nodes & ranks and Shared memory available
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
|
||||
int RankCount(void) { return GlobalSharedMemory::WorldSize;};
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,651 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*Construct from an MPI communicator*/
|
||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(_ShmSetup==0);
|
||||
WorldComm = comm;
|
||||
MPI_Comm_rank(WorldComm,&WorldRank);
|
||||
MPI_Comm_size(WorldComm,&WorldSize);
|
||||
// WorldComm, WorldSize, WorldRank
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
|
||||
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
|
||||
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
|
||||
// WorldShmComm, WorldShmSize, WorldShmRank
|
||||
|
||||
// WorldNodes
|
||||
WorldNodes = WorldSize/WorldShmSize;
|
||||
assert( (WorldNodes * WorldShmSize) == WorldSize );
|
||||
|
||||
// FIXME: Check all WorldShmSize are the same ?
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (WorldComm, &WorldGroup);
|
||||
MPI_Comm_group (WorldShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize); for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
WorldShmRanks.resize(WorldSize);
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and nominate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
std::vector<int> MyGroup;
|
||||
MyGroup.resize(WorldShmSize);
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(WorldShmRanks[rank]!=MPI_UNDEFINED){
|
||||
assert(g<WorldShmSize);
|
||||
MyGroup[g++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||
int myleader = MyGroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(WorldNodes,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the node of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
WorldNode=-1;
|
||||
for(int g=0;g<WorldNodes;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
WorldNode=g;
|
||||
}
|
||||
}
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd == -1) {
|
||||
printf("open %s failed\n",shm_name);
|
||||
perror("open hugetlbfs");
|
||||
exit(0);
|
||||
}
|
||||
int mmap_flag = MAP_SHARED ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHM_NONE
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
assert(WorldShmSize == 1);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
int fd=-1;
|
||||
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHMOPEN
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
||||
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
|
||||
// the posix shm virtual file system
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
char shm_name [NAME_MAX];
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
size_t size = bytes;
|
||||
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
shm_unlink(shm_name);
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
||||
ftruncate(fd, size);
|
||||
|
||||
int mmap_flag = MAP_SHARED;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if (flags) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||
if ( ptr == (void * )MAP_FAILED ) {
|
||||
perror("failed mmap");
|
||||
assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
if ( WorldShmRank != 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
size_t size = bytes ;
|
||||
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
////////////////////////////////////////////////////////
|
||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
{
|
||||
int rank, size;
|
||||
MPI_Comm_rank(comm,&rank);
|
||||
MPI_Comm_size(comm,&size);
|
||||
ShmRanks.resize(size);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Map ShmRank to WorldShmRank and use the right buffer
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
assert (GlobalSharedMemory::ShmAlloc()==1);
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
|
||||
}
|
||||
ShmBufferFreeAll();
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find comm ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group FullGroup, ShmGroup;
|
||||
MPI_Comm_group (comm , &FullGroup);
|
||||
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> ranks(size); for(int r=0;r<size;r++) ranks[r]=r;
|
||||
MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
//////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::ShmBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Test the shared memory is working
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::SharedMemoryTest(void)
|
||||
{
|
||||
ShmBarrier();
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
uint64_t * check = (uint64_t *) ShmCommBufs[r];
|
||||
check[0] = GlobalSharedMemory::WorldNode;
|
||||
check[1] = r;
|
||||
check[2] = 0x5A5A5A;
|
||||
}
|
||||
}
|
||||
ShmBarrier();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
uint64_t * check = (uint64_t *) ShmCommBufs[r];
|
||||
|
||||
assert(check[0]==GlobalSharedMemory::WorldNode);
|
||||
assert(check[1]==r);
|
||||
assert(check[2]==0x5A5A5A);
|
||||
|
||||
}
|
||||
ShmBarrier();
|
||||
}
|
||||
|
||||
void *SharedMemory::ShmBuffer(int rank)
|
||||
{
|
||||
int gpeer = ShmRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
return ShmCommBufs[gpeer];
|
||||
}
|
||||
}
|
||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
static int count =0;
|
||||
int gpeer = ShmRanks[rank];
|
||||
assert(gpeer!=ShmRank); // never send to self
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{
|
||||
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||
if ( !MPI_is_finalised ) {
|
||||
MPI_Comm_free(&ShmComm);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,128 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*Construct from an MPI communicator*/
|
||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(_ShmSetup==0);
|
||||
WorldComm = 0;
|
||||
WorldRank = 0;
|
||||
WorldSize = 1;
|
||||
WorldShmComm = 0 ;
|
||||
WorldShmRank = 0 ;
|
||||
WorldShmSize = 1 ;
|
||||
WorldNodes = 1 ;
|
||||
WorldNode = 0 ;
|
||||
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
|
||||
WorldShmCommBufs.resize(1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
optimal_comm = WorldComm;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended, use anonymous mmap
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
void * ShmCommBuf ;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
int mmap_flag =0;
|
||||
#ifdef MAP_ANONYMOUS
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
||||
#endif
|
||||
#ifdef MAP_ANON
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
||||
if (ShmCommBuf == (void *)MAP_FAILED) {
|
||||
perror("mmap failed ");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#ifdef MADV_HUGEPAGE
|
||||
if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
|
||||
#endif
|
||||
bzero(ShmCommBuf,bytes);
|
||||
WorldShmCommBufs[0] = ShmCommBuf;
|
||||
_ShmAllocBytes=bytes;
|
||||
_ShmAlloc=1;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
////////////////////////////////////////////////////////
|
||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(GlobalSharedMemory::ShmAlloc()==1);
|
||||
ShmRanks.resize(1);
|
||||
ShmCommBufs.resize(1);
|
||||
ShmRanks[0] = 0;
|
||||
ShmRank = 0;
|
||||
ShmSize = 1;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Map ShmRank to WorldShmRank and use the right buffer
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
ShmBufferFreeAll();
|
||||
return;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
//////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::ShmBarrier(void){ return ; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Test the shared memory is working
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::SharedMemoryTest(void) { return; }
|
||||
|
||||
void *SharedMemory::ShmBuffer(int rank)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{};
|
||||
|
||||
}
|
18920
Grid/json/json.hpp
18920
Grid/json/json.hpp
File diff suppressed because it is too large
Load Diff
@ -1,733 +0,0 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REDUCTION_H
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
namespace Grid {
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
auto nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
GridBase *grid = left._grid;
|
||||
const int pad = 8;
|
||||
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
// All threads sum across SIMD; reduce serial work at end
|
||||
// one write per cacheline with streaming store
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x._grid;
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z._odata[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
}
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
{
|
||||
GridBase *grid=arg._grid;
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ssum=zero;
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg._grid->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// FIXME precision promoted summation
|
||||
// may be important for correlation functions
|
||||
// But easily avoided by using double precision fields
|
||||
///////////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data._grid;
|
||||
assert(grid!=NULL);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
|
||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
// Parallel over orthog direction
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=zero;
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
scalar_type gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=scalar_type(0.0);
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int orthogdim,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced tensor_reduced;
|
||||
|
||||
scalar_type zscale(scale);
|
||||
|
||||
GridBase *grid = X._grid;
|
||||
|
||||
int Nsimd =grid->Nsimd();
|
||||
int Nblock =grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
int fd =grid->_fdimensions[orthogdim];
|
||||
int ld =grid->_ldimensions[orthogdim];
|
||||
int rd =grid->_rdimensions[orthogdim];
|
||||
|
||||
int e1 =grid->_slice_nblock[orthogdim];
|
||||
int e2 =grid->_slice_block [orthogdim];
|
||||
int stride =grid->_slice_stride[orthogdim];
|
||||
|
||||
std::vector<int> icoor;
|
||||
|
||||
for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
vector_type av;
|
||||
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
grid->iCoorFromIindex(icoor,l);
|
||||
int ldx =r+icoor[orthogdim]*rd;
|
||||
scalar_type *as =(scalar_type *)&av;
|
||||
as[l] = scalar_type(a[ldx])*zscale;
|
||||
}
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
mat_thread(i,j) += Reduce(rtmp);
|
||||
}}
|
||||
}}
|
||||
#pragma omp critical
|
||||
{
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
} /*END NAMESPACE GRID*/
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1,729 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/BinaryIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu<guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BINARY_IO_H
|
||||
#define GRID_BINARY_IO_H
|
||||
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
|
||||
#define USE_MPI_IO
|
||||
#else
|
||||
#undef USE_MPI_IO
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ENDIAN_H
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Byte reversal garbage
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
inline uint32_t byte_reverse32(uint32_t f) {
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
return f;
|
||||
}
|
||||
inline uint64_t byte_reverse64(uint64_t f) {
|
||||
uint64_t g;
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
return g;
|
||||
}
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
inline uint64_t Grid_ntohll(uint64_t A) { return A; }
|
||||
#else
|
||||
inline uint64_t Grid_ntohll(uint64_t A) {
|
||||
return byte_reverse64(A);
|
||||
}
|
||||
#endif
|
||||
|
||||
// A little helper
|
||||
inline void removeWhitespace(std::string &key)
|
||||
{
|
||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Static class holding the parallel IO code
|
||||
// Could just use a namespace
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BinaryIO {
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// more byte manipulation helpers
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> static inline void Uint32Checksum(Lattice<vobj> &lat,uint32_t &nersc_csum)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = lat._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
unvectorizeToLexOrdArray(scalardata,lat);
|
||||
|
||||
NerscChecksum(grid,scalardata,nersc_csum);
|
||||
}
|
||||
|
||||
template <class fobj>
|
||||
static inline void NerscChecksum(GridBase *grid, std::vector<fobj> &fbuf, uint32_t &nersc_csum)
|
||||
{
|
||||
const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
|
||||
|
||||
uint64_t lsites = grid->lSites();
|
||||
if (fbuf.size() == 1)
|
||||
{
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
PARALLEL_REGION
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
for (uint64_t j = 0; j < size32; j++)
|
||||
{
|
||||
nersc_csum_thr = nersc_csum_thr + site_buf[j];
|
||||
}
|
||||
}
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class fobj> static inline void ScidacChecksum(GridBase *grid,std::vector<fobj> &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb)
|
||||
{
|
||||
const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t);
|
||||
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
|
||||
uint64_t lsites =grid->lSites();
|
||||
if (fbuf.size()==1) {
|
||||
lsites=1;
|
||||
}
|
||||
std::vector<int> local_vol =grid->LocalDimensions();
|
||||
std::vector<int> local_start =grid->LocalStarts();
|
||||
std::vector<int> global_vol =grid->FullDimensions();
|
||||
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
|
||||
/*
|
||||
* Scidac csum is rather more heavyweight
|
||||
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||
*/
|
||||
|
||||
int global_site;
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||
|
||||
for(int d=0;d<nd;d++) {
|
||||
coor[d] = coor[d]+local_start[d];
|
||||
}
|
||||
|
||||
Lexicographic::IndexFromCoor(coor,global_site,global_vol);
|
||||
|
||||
uint32_t gsite29 = global_site%29;
|
||||
uint32_t gsite31 = global_site%31;
|
||||
|
||||
site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
|
||||
// std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
|
||||
// std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
|
||||
scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
}
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Network is big endian
|
||||
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
|
||||
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
|
||||
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
|
||||
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
|
||||
|
||||
static inline void be32toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint32_t * f = (uint32_t *)file_object;
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f[i] = ntohl(f[i]);
|
||||
}
|
||||
}
|
||||
// LE must Swap and switch to host
|
||||
static inline void le32toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint32_t *fp = (uint32_t *)file_object;
|
||||
uint32_t f;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = ntohl(f);
|
||||
}
|
||||
}
|
||||
|
||||
// BE is same as network
|
||||
static inline void be64toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint64_t * f = (uint64_t *)file_object;
|
||||
uint64_t count = bytes/sizeof(uint64_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f[i] = Grid_ntohll(f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// LE must swap and switch;
|
||||
static inline void le64toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint64_t *fp = (uint64_t *)file_object;
|
||||
uint64_t f,g;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint64_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = Grid_ntohll(g);
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Real action:
|
||||
// Read or Write distributed lexico array of ANY object to a specific location in file
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static const int BINARYIO_MASTER_APPEND = 0x10;
|
||||
static const int BINARYIO_UNORDERED = 0x08;
|
||||
static const int BINARYIO_LEXICOGRAPHIC = 0x04;
|
||||
static const int BINARYIO_READ = 0x02;
|
||||
static const int BINARYIO_WRITE = 0x01;
|
||||
|
||||
template<class word,class fobj>
|
||||
static inline void IOobject(word w,
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
grid->Barrier();
|
||||
GridStopWatch timer;
|
||||
GridStopWatch bstimer;
|
||||
|
||||
nersc_csum=0;
|
||||
scidac_csuma=0;
|
||||
scidac_csumb=0;
|
||||
|
||||
int ndim = grid->Dimensions();
|
||||
int nrank = grid->ProcessorCount();
|
||||
int myrank = grid->ThisRank();
|
||||
|
||||
std::vector<int> psizes = grid->ProcessorGrid();
|
||||
std::vector<int> pcoor = grid->ThisProcessorCoor();
|
||||
std::vector<int> gLattice= grid->GlobalDimensions();
|
||||
std::vector<int> lLattice= grid->LocalDimensions();
|
||||
|
||||
std::vector<int> lStart(ndim);
|
||||
std::vector<int> gStart(ndim);
|
||||
|
||||
// Flatten the file
|
||||
uint64_t lsites = grid->lSites();
|
||||
if ( control & BINARYIO_MASTER_APPEND ) {
|
||||
assert(iodata.size()==1);
|
||||
} else {
|
||||
assert(lsites==iodata.size());
|
||||
}
|
||||
for(int d=0;d<ndim;d++){
|
||||
gStart[d] = lLattice[d]*pcoor[d];
|
||||
lStart[d] = 0;
|
||||
}
|
||||
|
||||
#ifdef USE_MPI_IO
|
||||
std::vector<int> distribs(ndim,MPI_DISTRIBUTE_BLOCK);
|
||||
std::vector<int> dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG);
|
||||
MPI_Datatype mpiObject;
|
||||
MPI_Datatype fileArray;
|
||||
MPI_Datatype localArray;
|
||||
MPI_Datatype mpiword;
|
||||
MPI_Offset disp = offset;
|
||||
MPI_File fh ;
|
||||
MPI_Status status;
|
||||
int numword;
|
||||
|
||||
if ( sizeof( word ) == sizeof(float ) ) {
|
||||
numword = sizeof(fobj)/sizeof(float);
|
||||
mpiword = MPI_FLOAT;
|
||||
} else {
|
||||
numword = sizeof(fobj)/sizeof(double);
|
||||
mpiword = MPI_DOUBLE;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Sobj in MPI phrasing
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int ierr;
|
||||
ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject); assert(ierr==0);
|
||||
ierr = MPI_Type_commit(&mpiObject);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// File global array data type
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray); assert(ierr==0);
|
||||
ierr=MPI_Type_commit(&fileArray); assert(ierr==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// local lattice array
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0);
|
||||
ierr=MPI_Type_commit(&localArray); assert(ierr==0);
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Byte order
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Do the I/O
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
if ( control & BINARYIO_READ ) {
|
||||
|
||||
timer.Start();
|
||||
|
||||
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
|
||||
#ifdef USE_MPI_IO
|
||||
std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
|
||||
ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0);
|
||||
ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0);
|
||||
ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0);
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
} else {
|
||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
std::ifstream fin;
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
if (control & BINARYIO_MASTER_APPEND)
|
||||
{
|
||||
fin.seekg(-sizeof(fobj), fin.end);
|
||||
}
|
||||
else
|
||||
{
|
||||
fin.seekg(offset + myrank * lsites * sizeof(fobj));
|
||||
}
|
||||
fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
|
||||
assert(fin.fail() == 0);
|
||||
fin.close();
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
bstimer.Start();
|
||||
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
|
||||
if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
NerscChecksum(grid,iodata,nersc_csum);
|
||||
bstimer.Stop();
|
||||
}
|
||||
|
||||
if ( control & BINARYIO_WRITE ) {
|
||||
|
||||
bstimer.Start();
|
||||
NerscChecksum(grid,iodata,nersc_csum);
|
||||
if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
|
||||
bstimer.Stop();
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
timer.Start();
|
||||
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
|
||||
#ifdef USE_MPI_IO
|
||||
std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
|
||||
ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
|
||||
// std::cout << GridLogMessage << "Checking for errors" << std::endl;
|
||||
if (ierr != MPI_SUCCESS)
|
||||
{
|
||||
char error_string[BUFSIZ];
|
||||
int length_of_error_string, error_class;
|
||||
|
||||
MPI_Error_class(ierr, &error_class);
|
||||
MPI_Error_string(error_class, error_string, &length_of_error_string);
|
||||
fprintf(stderr, "%3d: %s\n", myrank, error_string);
|
||||
MPI_Error_string(ierr, error_string, &length_of_error_string);
|
||||
fprintf(stderr, "%3d: %s\n", myrank, error_string);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
try {
|
||||
if (offset) { // Must already exist and contain data
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
} else { // Allow create
|
||||
fout.open(file,std::ios::binary|std::ios::out);
|
||||
}
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( control & BINARYIO_MASTER_APPEND ) {
|
||||
try {
|
||||
fout.seekp(0,fout.end);
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in seeking file end " << file << std::endl;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
fout.seekp(offset+myrank*lsites*sizeof(fobj));
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
|
||||
}
|
||||
catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in writing file " << file << std::endl;
|
||||
std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"IOobject: ";
|
||||
if ( control & BINARYIO_READ) std::cout << " read ";
|
||||
else std::cout << " write ";
|
||||
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
|
||||
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Safety check
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// if the data size is 1 we do not want to sum over the MPI ranks
|
||||
if (iodata.size() != 1){
|
||||
grid->Barrier();
|
||||
grid->GlobalSum(nersc_csum);
|
||||
grid->GlobalXOR(scidac_csuma);
|
||||
grid->GlobalXOR(scidac_csumb);
|
||||
grid->Barrier();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Read a Lattice of object
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||
grid->Barrier();
|
||||
|
||||
timer.Stop();
|
||||
std::cout<<GridLogMessage<<"readLatticeObject: vectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Write a Lattice of object
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Munge [ .e.g 3rd row recon ]
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
GridStopWatch timer; timer.Start();
|
||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
|
||||
grid->Barrier();
|
||||
timer.Stop();
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Read a RNG; use IOobject and lexico map to an array of state
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static inline void readRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename GridSerialRNG::RngStateType RngStateType;
|
||||
const int RngStateCount = GridSerialRNG::RngStateCount;
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
typedef RngStateType word; word w=0;
|
||||
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp = 0;
|
||||
uint32_t scidac_csuma_tmp = 0;
|
||||
uint32_t scidac_csumb_tmp = 0;
|
||||
|
||||
GridStopWatch timer;
|
||||
|
||||
std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl;
|
||||
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
timer.Start();
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||
parallel.SetState(tmp,lidx);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
iodata.resize(1);
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND,
|
||||
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
|
||||
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin());
|
||||
serial.SetState(tmp,0);
|
||||
}
|
||||
|
||||
nersc_csum = nersc_csum + nersc_csum_tmp;
|
||||
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
|
||||
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
|
||||
|
||||
std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Write a RNG; lexico map to an array of state and use IOobject
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static inline void writeRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename GridSerialRNG::RngStateType RngStateType;
|
||||
typedef RngStateType word; word w=0;
|
||||
const int RngStateCount = GridSerialRNG::RngStateCount;
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp;
|
||||
uint32_t scidac_csuma_tmp;
|
||||
uint32_t scidac_csumb_tmp;
|
||||
|
||||
GridStopWatch timer;
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl;
|
||||
|
||||
timer.Start();
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
parallel.GetState(tmp,lidx);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
serial.GetState(tmp,0);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[0].begin());
|
||||
}
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
|
||||
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
|
||||
|
||||
nersc_csum = nersc_csum + nersc_csum_tmp;
|
||||
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
|
||||
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
|
||||
|
||||
std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,876 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/IldgIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ILDG_IO_H
|
||||
#define GRID_ILDG_IO_H
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
#include <pwd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//C-Lime is a must have for this functionality
|
||||
extern "C" {
|
||||
#include "lime.h"
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/////////////////////////////////
|
||||
// Encode word types as strings
|
||||
/////////////////////////////////
|
||||
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
|
||||
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
|
||||
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Encode a generic tensor as a string
|
||||
/////////////////////////////////////////
|
||||
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
|
||||
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
int _ColourN = indexRank<ColourIndex,vobj>();
|
||||
int _ColourScalar = isScalar<ColourIndex,vobj>();
|
||||
int _ColourVector = isVector<ColourIndex,vobj>();
|
||||
int _ColourMatrix = isMatrix<ColourIndex,vobj>();
|
||||
|
||||
int _SpinN = indexRank<SpinIndex,vobj>();
|
||||
int _SpinScalar = isScalar<SpinIndex,vobj>();
|
||||
int _SpinVector = isVector<SpinIndex,vobj>();
|
||||
int _SpinMatrix = isMatrix<SpinIndex,vobj>();
|
||||
|
||||
int _LorentzN = indexRank<LorentzIndex,vobj>();
|
||||
int _LorentzScalar = isScalar<LorentzIndex,vobj>();
|
||||
int _LorentzVector = isVector<LorentzIndex,vobj>();
|
||||
int _LorentzMatrix = isMatrix<LorentzIndex,vobj>();
|
||||
|
||||
std::stringstream stream;
|
||||
|
||||
stream << "GRID_";
|
||||
stream << ScidacWordMnemonic<stype>();
|
||||
|
||||
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
|
||||
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
|
||||
|
||||
if ( _SpinVector ) stream << "_SpinVector"<<_SpinN;
|
||||
if ( _SpinMatrix ) stream << "_SpinMatrix"<<_SpinN;
|
||||
|
||||
if ( _ColourVector ) stream << "_ColourVector"<<_ColourN;
|
||||
if ( _ColourMatrix ) stream << "_ColourMatrix"<<_ColourN;
|
||||
|
||||
if ( _ColourScalar && _LorentzScalar && _SpinScalar ) stream << "_Complex";
|
||||
|
||||
|
||||
typesize = sizeof(typename vobj::scalar_type);
|
||||
|
||||
if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
|
||||
else typesize*= _ColourN;
|
||||
|
||||
if ( _SpinMatrix ) typesize*= _SpinN*_SpinN;
|
||||
else typesize*= _SpinN;
|
||||
|
||||
colors = _ColourN;
|
||||
spins = _SpinN;
|
||||
datacount = _LorentzN;
|
||||
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
|
||||
return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Helper to fill out metadata
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||
FieldMetaData &header,
|
||||
scidacRecord & _scidacRecord,
|
||||
scidacFile & _scidacFile)
|
||||
{
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
/////////////////////////////////////
|
||||
// Pull Grid's metadata
|
||||
/////////////////////////////////////
|
||||
PrepareMetaData(field,header);
|
||||
|
||||
/////////////////////////////////////
|
||||
// Scidac Private File structure
|
||||
/////////////////////////////////////
|
||||
_scidacFile = scidacFile(field._grid);
|
||||
|
||||
/////////////////////////////////////
|
||||
// Scidac Private Record structure
|
||||
/////////////////////////////////////
|
||||
scidacRecord sr;
|
||||
sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
|
||||
sr.date = header.creation_date;
|
||||
sr.precision = ScidacWordMnemonic<stype>();
|
||||
sr.recordtype = GRID_IO_FIELD;
|
||||
|
||||
_scidacRecord = sr;
|
||||
|
||||
// std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Scidac checksum
|
||||
///////////////////////////////////////////////////////
|
||||
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
|
||||
{
|
||||
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
|
||||
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
|
||||
if ( scidac_csuma !=scidac_checksuma) return 0;
|
||||
if ( scidac_csumb !=scidac_checksumb) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lime, ILDG and Scidac I/O classes
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
class GridLimeReader : public BinaryIO {
|
||||
public:
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
FILE *File;
|
||||
LimeReader *LimeR;
|
||||
std::string filename;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Open the file
|
||||
/////////////////////////////////////////////
|
||||
void open(const std::string &_filename)
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void){
|
||||
fclose(File);
|
||||
// limeDestroyReader(LimeR);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Read a generic lattice field and verify checksum
|
||||
////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
scidacChecksum scidacChecksum_;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
|
||||
std::string format = getFormatString<vobj>();
|
||||
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
uint64_t file_bytes =limeReaderBytes(LimeR);
|
||||
|
||||
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
|
||||
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
|
||||
|
||||
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
|
||||
|
||||
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
|
||||
// std::cout << "R file size " <<file_bytes <<std::endl;
|
||||
|
||||
assert(PayloadSize == file_bytes);// Must match or user error
|
||||
|
||||
uint64_t offset= ftello(File);
|
||||
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
/////////////////////////////////////////////
|
||||
// Insist checksum is next record
|
||||
/////////////////////////////////////////////
|
||||
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Verify checksums
|
||||
/////////////////////////////////////////////
|
||||
assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Read a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
void readLimeObject(std::string &xmlstring,std::string record_name)
|
||||
{
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
|
||||
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
|
||||
std::vector<char> xmlc(nbytes+1,'\0');
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
xmlstring = std::string(&xmlc[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
|
||||
readLimeObject(xmlstring, record_name);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
}
|
||||
};
|
||||
|
||||
class GridLimeWriter : public BinaryIO
|
||||
{
|
||||
public:
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
// FIXME: collective calls or not ?
|
||||
// : must know if I am the I/O boss
|
||||
///////////////////////////////////////////////////
|
||||
FILE *File;
|
||||
LimeWriter *LimeW;
|
||||
std::string filename;
|
||||
bool boss_node;
|
||||
GridLimeWriter( bool isboss = true) {
|
||||
boss_node = isboss;
|
||||
}
|
||||
void open(const std::string &_filename) {
|
||||
filename= _filename;
|
||||
if ( boss_node ) {
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void) {
|
||||
if ( boss_node ) {
|
||||
fclose(File);
|
||||
}
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
// Lime utility functions
|
||||
///////////////////////////////////////////////////////
|
||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
return LIME_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Write a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
std::string xmlstring = writer.docString();
|
||||
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
int err;
|
||||
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
|
||||
assert(h!= NULL);
|
||||
|
||||
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
|
||||
if (scientificPrec)
|
||||
{
|
||||
WR.scientificFormat(true);
|
||||
WR.setPrecision(scientificPrec);
|
||||
}
|
||||
write(WR,object_name,object);
|
||||
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
// Write a generic lattice field and csum
|
||||
// This routine is Collectively called by all nodes
|
||||
// in communicator used by the field._grid
|
||||
////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||
// the same file through different file handles (integer units).
|
||||
//
|
||||
// These are both buffered, so why I think this code is right is as follows.
|
||||
//
|
||||
// i) write record header to FILE *File, telegraphing the size; flush
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||
// Closes iostream and flushes.
|
||||
// iv) fseek on FILE * to end of this disjoint section.
|
||||
// v) Continue writing scidac record.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
assert(boss_node == field._grid->IsBoss() );
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||
if ( boss_node ) {
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
fflush(File);
|
||||
}
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Check all nodes agree on file position
|
||||
////////////////////////////////////////////////
|
||||
uint64_t offset1;
|
||||
if ( boss_node ) {
|
||||
offset1 = ftello(File);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||
|
||||
///////////////////////////////////////////
|
||||
// The above is collective. Write by other means into the binary record
|
||||
///////////////////////////////////////////
|
||||
std::string format = getFormatString<vobj>();
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Wind forward and close the record
|
||||
///////////////////////////////////////////
|
||||
if ( boss_node ) {
|
||||
fseek(File,0,SEEK_END);
|
||||
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||
assert( (offset2-offset1) == PayloadSize);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Check MPI-2 I/O did what we expect to file
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
if ( boss_node ) {
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Write checksum element, propagaing forward from the BinaryIO
|
||||
// Always pair a checksum with a binary object, and close message
|
||||
////////////////////////////////////////
|
||||
scidacChecksum checksum;
|
||||
std::stringstream streama; streama << std::hex << scidac_csuma;
|
||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||
checksum.suma= streama.str();
|
||||
checksum.sumb= streamb.str();
|
||||
if ( boss_node ) {
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ScidacWriter : public GridLimeWriter {
|
||||
public:
|
||||
|
||||
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||
const unsigned int recordScientificPrec = 0)
|
||||
{
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
ScidacMetaData(field,header,_scidacRecord,_scidacFile);
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
}
|
||||
// Collective call
|
||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ScidacReader : public GridLimeReader {
|
||||
public:
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
|
||||
}
|
||||
void skipPastBinaryRecord(void) {
|
||||
std::string rec_name(ILDG_BINARY_DATA);
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
|
||||
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void skipPastObjectRecord(std::string rec_name) {
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void skipScidacFieldRecord() {
|
||||
skipPastObjectRecord(std::string(GRID_FORMAT));
|
||||
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
|
||||
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
skipPastBinaryRecord();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class IldgWriter : public ScidacWriter {
|
||||
public:
|
||||
|
||||
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||
|
||||
///////////////////////////////////
|
||||
// A little helper
|
||||
///////////////////////////////////
|
||||
void writeLimeIldgLFN(std::string &LFN)
|
||||
{
|
||||
uint64_t PayloadSize = LFN.size();
|
||||
int err;
|
||||
createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
|
||||
err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Special ILDG operations ; gauge configs only.
|
||||
// Don't require scidac records EXCEPT checksum
|
||||
// Use Grid MetaData object if present.
|
||||
////////////////////////////////////////////////////////////////
|
||||
template <class vsimd>
|
||||
void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
|
||||
{
|
||||
GridBase * grid = Umu._grid;
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
|
||||
|
||||
std::string format = header.floating_point;
|
||||
header.ensemble_id = description;
|
||||
header.ensemble_label = description;
|
||||
header.sequence_number = sequence;
|
||||
header.ildg_lfn = LFN;
|
||||
|
||||
assert ( (format == std::string("IEEE32BIG"))
|
||||
||(format == std::string("IEEE64BIG")) );
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Fill ILDG header data struct
|
||||
//////////////////////////////////////////////////////
|
||||
ildgFormat ildgfmt ;
|
||||
ildgfmt.field = std::string("su3gauge");
|
||||
|
||||
if ( format == std::string("IEEE32BIG") ) {
|
||||
ildgfmt.precision = 32;
|
||||
} else {
|
||||
ildgfmt.precision = 64;
|
||||
}
|
||||
ildgfmt.version = 1.0;
|
||||
ildgfmt.lx = header.dimension[0];
|
||||
ildgfmt.ly = header.dimension[1];
|
||||
ildgfmt.lz = header.dimension[2];
|
||||
ildgfmt.lt = header.dimension[3];
|
||||
assert(header.nd==4);
|
||||
assert(header.nd==header.dimension.size());
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Fill the USQCD info field
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
usqcdInfo info;
|
||||
info.version=1.0;
|
||||
info.plaq = header.plaquette;
|
||||
info.linktr = header.link_trace;
|
||||
|
||||
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec
|
||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
};
|
||||
|
||||
class IldgReader : public GridLimeReader {
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Read either Grid/SciDAC/ILDG configuration
|
||||
// Don't require scidac records EXCEPT checksum
|
||||
// Use Grid MetaData object if present.
|
||||
// Else use ILDG MetaData object if present.
|
||||
// Else use SciDAC MetaData object if present.
|
||||
////////////////////////////////////////////////////////////////
|
||||
template <class vsimd>
|
||||
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
|
||||
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
typedef typename GaugeField::vector_object vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
typedef LorentzColourMatrixF fobj;
|
||||
typedef LorentzColourMatrixD dobj;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::vector<int> dims = Umu._grid->FullDimensions();
|
||||
|
||||
assert(dims.size()==4);
|
||||
|
||||
// Metadata holders
|
||||
ildgFormat ildgFormat_ ;
|
||||
std::string ildgLFN_ ;
|
||||
scidacChecksum scidacChecksum_;
|
||||
usqcdInfo usqcdInfo_ ;
|
||||
|
||||
// track what we read from file
|
||||
int found_ildgFormat =0;
|
||||
int found_ildgLFN =0;
|
||||
int found_scidacChecksum=0;
|
||||
int found_usqcdInfo =0;
|
||||
int found_ildgBinary =0;
|
||||
int found_FieldMetaData =0;
|
||||
|
||||
uint32_t nersc_csum;
|
||||
uint32_t scidac_csuma;
|
||||
uint32_t scidac_csumb;
|
||||
|
||||
// Binary format
|
||||
std::string format;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Loop over all records
|
||||
// -- Order is poorly guaranteed except ILDG header preceeds binary section.
|
||||
// -- Run like an event loop.
|
||||
// -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
|
||||
// that Scidac.
|
||||
// -- Insist on Scidac checksum record.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// If not BINARY_DATA read a string and parse
|
||||
//////////////////////////////////////////////////////////////////
|
||||
if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) {
|
||||
|
||||
// Copy out the string
|
||||
std::vector<char> xmlc(nbytes+1,'\0');
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
|
||||
|
||||
assert( ildgFormat_.lx == dims[0]);
|
||||
assert( ildgFormat_.ly == dims[1]);
|
||||
assert( ildgFormat_.lz == dims[2]);
|
||||
assert( ildgFormat_.lt == dims[3]);
|
||||
|
||||
found_ildgFormat = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
|
||||
assert(FieldMetaData_.dimension[0] == dims[0]);
|
||||
assert(FieldMetaData_.dimension[1] == dims[1]);
|
||||
assert(FieldMetaData_.dimension[2] == dims[2]);
|
||||
assert(FieldMetaData_.dimension[3] == dims[3]);
|
||||
|
||||
found_FieldMetaData = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
// is it a USQCD info field
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
/////////////////////////////////
|
||||
// Binary data
|
||||
/////////////////////////////////
|
||||
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
|
||||
uint64_t offset= ftello(File);
|
||||
if ( format == std::string("IEEE64BIG") ) {
|
||||
GaugeSimpleMunger<dobj, sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
} else {
|
||||
GaugeSimpleMunger<fobj, sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
|
||||
found_ildgBinary = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Minimally must find binary segment and checksum
|
||||
// Since this is an ILDG reader require ILDG format
|
||||
//////////////////////////////////////////////////////
|
||||
assert(found_ildgBinary);
|
||||
assert(found_ildgFormat);
|
||||
assert(found_scidacChecksum);
|
||||
|
||||
// Must find something with the lattice dimensions
|
||||
assert(found_FieldMetaData||found_ildgFormat);
|
||||
|
||||
if ( found_FieldMetaData ) {
|
||||
|
||||
std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
|
||||
|
||||
} else {
|
||||
|
||||
assert(found_ildgFormat);
|
||||
assert ( ildgFormat_.field == std::string("su3gauge") );
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Populate our Grid metadata as best we can
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::ostringstream vers; vers << ildgFormat_.version;
|
||||
FieldMetaData_.hdr_version = vers.str();
|
||||
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
|
||||
|
||||
FieldMetaData_.nd=4;
|
||||
FieldMetaData_.dimension.resize(4);
|
||||
|
||||
FieldMetaData_.dimension[0] = ildgFormat_.lx ;
|
||||
FieldMetaData_.dimension[1] = ildgFormat_.ly ;
|
||||
FieldMetaData_.dimension[2] = ildgFormat_.lz ;
|
||||
FieldMetaData_.dimension[3] = ildgFormat_.lt ;
|
||||
|
||||
if ( found_usqcdInfo ) {
|
||||
FieldMetaData_.plaquette = usqcdInfo_.plaq;
|
||||
FieldMetaData_.link_trace= usqcdInfo_.linktr;
|
||||
std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
|
||||
std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
|
||||
std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
|
||||
} else {
|
||||
FieldMetaData_.plaquette = 0.0;
|
||||
FieldMetaData_.link_trace= 0.0;
|
||||
std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Really really want to mandate a scidac checksum
|
||||
////////////////////////////////////////////////////////////
|
||||
if ( found_scidacChecksum ) {
|
||||
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
|
||||
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
|
||||
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
|
||||
assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
|
||||
assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
|
||||
std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
|
||||
assert(0); // Can I insist always checksum ?
|
||||
}
|
||||
|
||||
if ( found_FieldMetaData || found_usqcdInfo ) {
|
||||
FieldMetaData checker;
|
||||
GaugeStatistics(Umu,checker);
|
||||
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
|
||||
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
|
||||
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
//HAVE_LIME
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,237 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/IldgIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ILDGTYPES_IO_H
|
||||
#define GRID_ILDGTYPES_IO_H
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
extern "C" { // for linkage
|
||||
#include "lime.h"
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Data representation of records that enter ILDG and SciDac formats
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define GRID_FORMAT "grid-format"
|
||||
#define ILDG_FORMAT "ildg-format"
|
||||
#define ILDG_BINARY_DATA "ildg-binary-data"
|
||||
#define ILDG_DATA_LFN "ildg-data-lfn"
|
||||
#define SCIDAC_CHECKSUM "scidac-checksum"
|
||||
#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml"
|
||||
#define SCIDAC_FILE_XML "scidac-file-xml"
|
||||
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
|
||||
#define SCIDAC_RECORD_XML "scidac-record-xml"
|
||||
#define SCIDAC_BINARY_DATA "scidac-binary-data"
|
||||
// Unused SCIDAC records names; could move to support this functionality
|
||||
#define SCIDAC_SITELIST "scidac-sitelist"
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
|
||||
const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat
|
||||
const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat
|
||||
const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// QIO uses mandatory "private" records fixed format
|
||||
// Private is in principle "opaque" however it can't be changed now because that would break existing
|
||||
// file compatability, so should be correct to assume the undocumented but defacto file structure.
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct emptyUserRecord : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
|
||||
emptyUserRecord() { dummy=0; };
|
||||
};
|
||||
|
||||
////////////////////////
|
||||
// Scidac private file xml
|
||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
|
||||
////////////////////////
|
||||
struct scidacFile : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
|
||||
double, version,
|
||||
int, spacetime,
|
||||
std::string, dims, // must convert to int
|
||||
int, volfmt);
|
||||
|
||||
std::vector<int> getDimensions(void) {
|
||||
std::stringstream stream(dims);
|
||||
std::vector<int> dimensions;
|
||||
int n;
|
||||
while(stream >> n){
|
||||
dimensions.push_back(n);
|
||||
}
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void setDimensions(std::vector<int> dimensions) {
|
||||
char delimiter = ' ';
|
||||
std::stringstream stream;
|
||||
for(int i=0;i<dimensions.size();i++){
|
||||
stream << dimensions[i];
|
||||
if ( i != dimensions.size()-1) {
|
||||
stream << delimiter <<std::endl;
|
||||
}
|
||||
}
|
||||
dims = stream.str();
|
||||
}
|
||||
|
||||
// Constructor provides Grid
|
||||
scidacFile() =default; // default constructor
|
||||
scidacFile(GridBase * grid){
|
||||
version = 1.0;
|
||||
spacetime = grid->_ndimension;
|
||||
setDimensions(grid->FullDimensions());
|
||||
volfmt = GRID_IO_SINGLEFILE;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// scidac-private-record-xml : example
|
||||
// <scidacRecord>
|
||||
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
|
||||
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
|
||||
// <typesize>144</typesize><datacount>4</datacount>
|
||||
// </scidacRecord>
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct scidacRecord : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
|
||||
double, version,
|
||||
std::string, date,
|
||||
int, recordtype,
|
||||
std::string, datatype,
|
||||
std::string, precision,
|
||||
int, colors,
|
||||
int, spins,
|
||||
int, typesize,
|
||||
int, datacount);
|
||||
|
||||
scidacRecord()
|
||||
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||
{}
|
||||
};
|
||||
|
||||
////////////////////////
|
||||
// ILDG format
|
||||
////////////////////////
|
||||
struct ildgFormat : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
|
||||
double, version,
|
||||
std::string, field,
|
||||
int, precision,
|
||||
int, lx,
|
||||
int, ly,
|
||||
int, lz,
|
||||
int, lt);
|
||||
ildgFormat() { version=1.0; };
|
||||
};
|
||||
////////////////////////
|
||||
// USQCD info
|
||||
////////////////////////
|
||||
struct usqcdInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
|
||||
double, version,
|
||||
double, plaq,
|
||||
double, linktr,
|
||||
std::string, info);
|
||||
usqcdInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
////////////////////////
|
||||
// Scidac Checksum
|
||||
////////////////////////
|
||||
struct scidacChecksum : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
|
||||
double, version,
|
||||
std::string, suma,
|
||||
std::string, sumb);
|
||||
scidacChecksum() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Type: scidac-file-xml <title>MILC ILDG archival gauge configuration</title>
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Type:
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////
|
||||
// Scidac private file xml
|
||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
|
||||
////////////////////////
|
||||
|
||||
#if 0
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct usqcdPropFile : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
|
||||
double, version,
|
||||
std::string, type,
|
||||
std::string, info);
|
||||
usqcdPropFile() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
struct usqcdSourceInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
|
||||
double, version,
|
||||
std::string, info);
|
||||
usqcdSourceInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
struct usqcdPropInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
|
||||
double, version,
|
||||
int, spin,
|
||||
int, color,
|
||||
std::string, info);
|
||||
usqcdPropInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
@ -1,327 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <unistd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Precision mapping
|
||||
///////////////////////////////////////////////////////
|
||||
template<class vobj> static std::string getFormatString (void)
|
||||
{
|
||||
std::string format;
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
if ( sizeof(stype) == sizeof(float) ) {
|
||||
format = std::string("IEEE32BIG");
|
||||
}
|
||||
if ( sizeof(stype) == sizeof(double) ) {
|
||||
format = std::string("IEEE64BIG");
|
||||
}
|
||||
return format;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// header specification/interpretation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class FieldMetaData : Serializable {
|
||||
public:
|
||||
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
|
||||
int, nd,
|
||||
std::vector<int>, dimension,
|
||||
std::vector<std::string>, boundary,
|
||||
int, data_start,
|
||||
std::string, hdr_version,
|
||||
std::string, storage_format,
|
||||
double, link_trace,
|
||||
double, plaquette,
|
||||
uint32_t, checksum,
|
||||
uint32_t, scidac_checksuma,
|
||||
uint32_t, scidac_checksumb,
|
||||
unsigned int, sequence_number,
|
||||
std::string, data_type,
|
||||
std::string, ensemble_id,
|
||||
std::string, ensemble_label,
|
||||
std::string, ildg_lfn,
|
||||
std::string, creator,
|
||||
std::string, creator_hardware,
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
|
||||
{
|
||||
int nd = grid->_ndimension;
|
||||
header.nd = nd;
|
||||
header.dimension.resize(nd);
|
||||
header.boundary.resize(nd);
|
||||
header.data_start = 0;
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.dimension[d] = grid->_fdimensions[d];
|
||||
}
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.boundary[d] = std::string("PERIODIC");
|
||||
}
|
||||
}
|
||||
|
||||
inline void MachineCharacteristics(FieldMetaData &header)
|
||||
{
|
||||
// Who
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
if (pw) header.creator = std::string(pw->pw_name);
|
||||
|
||||
// When
|
||||
std::time_t t = std::time(nullptr);
|
||||
std::tm tm_ = *std::localtime(&t);
|
||||
std::ostringstream oss;
|
||||
// oss << std::put_time(&tm_, "%c %Z");
|
||||
header.creation_date = oss.str();
|
||||
header.archive_date = header.creation_date;
|
||||
|
||||
// What
|
||||
struct utsname name; uname(&name);
|
||||
header.creator_hardware = std::string(name.nodename)+"-";
|
||||
header.creator_hardware+= std::string(name.machine)+"-";
|
||||
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||
header.creator_hardware+= std::string(name.release);
|
||||
}
|
||||
|
||||
#define dump_meta_data(field, s) \
|
||||
s << "BEGIN_HEADER" << std::endl; \
|
||||
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
|
||||
s << "DATATYPE = " << field.data_type << std::endl; \
|
||||
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
|
||||
} \
|
||||
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
|
||||
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
|
||||
} \
|
||||
\
|
||||
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
|
||||
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
|
||||
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
|
||||
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
|
||||
s << "CREATOR = " << field.creator << std::endl; \
|
||||
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
|
||||
s << "CREATION_DATE = " << field.creation_date << std::endl; \
|
||||
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
|
||||
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
|
||||
s << "END_HEADER" << std::endl;
|
||||
|
||||
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vobj>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
|
||||
{
|
||||
// How to convert data precision etc...
|
||||
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
|
||||
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
|
||||
}
|
||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
|
||||
{
|
||||
// How to convert data precision etc...
|
||||
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
|
||||
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
|
||||
}
|
||||
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
|
||||
{
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vLorentzColourMatrixF>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
GaugeStatistics(field,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vLorentzColourMatrixD>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
GaugeStatistics(field,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Utilities ; these are QCD aware
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||
{
|
||||
const int x=0;
|
||||
const int y=1;
|
||||
const int z=2;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Some data types for intermediate storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
|
||||
|
||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Simple classes for precision conversion
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleUnmunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *out_buffer = (fobj_stype *)&out;
|
||||
sobj_stype *in_buffer = (sobj_stype *)∈
|
||||
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleMunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *in_buffer = (fobj_stype *)∈
|
||||
sobj_stype *out_buffer = (sobj_stype *)&out;
|
||||
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct GaugeSimpleMunger{
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct GaugeSimpleUnmunger {
|
||||
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2munger{
|
||||
void operator() (fobj &in,sobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)(i)(j);
|
||||
}}
|
||||
}
|
||||
reconstruct3(out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2unmunger{
|
||||
void operator() (sobj &in,fobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)(i)(j) = in(mu)()(i,j);
|
||||
}}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,363 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Matt Spraggs <matthew.spraggs@gmail.com>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NERSC_IO_H
|
||||
#define GRID_NERSC_IO_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Write and read from fstream; comput header offset for payload
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class NerscIO : public BinaryIO {
|
||||
public:
|
||||
|
||||
static inline void truncate(std::string file){
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
}
|
||||
|
||||
static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
|
||||
{
|
||||
std::ofstream fout(file,std::ios::out|std::ios::in);
|
||||
fout.seekp(0,std::ios::beg);
|
||||
dump_meta_data(field, fout);
|
||||
field.data_start = fout.tellp();
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
uint64_t offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// read the header
|
||||
//////////////////////////////////////////////////
|
||||
std::ifstream fin(file);
|
||||
|
||||
getline(fin,line); // read one line and insist is
|
||||
|
||||
removeWhitespace(line);
|
||||
std::cout << GridLogMessage << "* " << line << std::endl;
|
||||
|
||||
assert(line==std::string("BEGIN_HEADER"));
|
||||
|
||||
do {
|
||||
getline(fin,line); // read one line
|
||||
std::cout << GridLogMessage << "* "<<line<< std::endl;
|
||||
int eq = line.find("=");
|
||||
if(eq >0) {
|
||||
std::string key=line.substr(0,eq);
|
||||
std::string val=line.substr(eq+1);
|
||||
removeWhitespace(key);
|
||||
removeWhitespace(val);
|
||||
|
||||
header[key] = val;
|
||||
}
|
||||
} while( line.find("END_HEADER") == std::string::npos );
|
||||
|
||||
field.data_start = fin.tellg();
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// chomp the values
|
||||
//////////////////////////////////////////////////
|
||||
field.hdr_version = header["HDR_VERSION"];
|
||||
field.data_type = header["DATATYPE"];
|
||||
field.storage_format = header["STORAGE_FORMAT"];
|
||||
|
||||
field.dimension[0] = std::stol(header["DIMENSION_1"]);
|
||||
field.dimension[1] = std::stol(header["DIMENSION_2"]);
|
||||
field.dimension[2] = std::stol(header["DIMENSION_3"]);
|
||||
field.dimension[3] = std::stol(header["DIMENSION_4"]);
|
||||
|
||||
assert(grid->_ndimension == 4);
|
||||
for(int d=0;d<4;d++){
|
||||
assert(grid->_fdimensions[d]==field.dimension[d]);
|
||||
}
|
||||
|
||||
field.link_trace = std::stod(header["LINK_TRACE"]);
|
||||
field.plaquette = std::stod(header["PLAQUETTE"]);
|
||||
|
||||
field.boundary[0] = header["BOUNDARY_1"];
|
||||
field.boundary[1] = header["BOUNDARY_2"];
|
||||
field.boundary[2] = header["BOUNDARY_3"];
|
||||
field.boundary[3] = header["BOUNDARY_4"];
|
||||
|
||||
field.checksum = std::stoul(header["CHECKSUM"],0,16);
|
||||
field.ensemble_id = header["ENSEMBLE_ID"];
|
||||
field.ensemble_label = header["ENSEMBLE_LABEL"];
|
||||
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
|
||||
field.creator = header["CREATOR"];
|
||||
field.creator_hardware = header["CREATOR_HARDWARE"];
|
||||
field.creation_date = header["CREATION_DATE"];
|
||||
field.archive_date = header["ARCHIVE_DATE"];
|
||||
field.floating_point = header["FLOATING_POINT"];
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Now the meat: the object readers
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
||||
FieldMetaData& header,
|
||||
std::string file)
|
||||
{
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
GaugeStatistics(Umu,clone);
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
|
||||
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
|
||||
<<" header "<<header.plaquette<<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
|
||||
<<" header "<<header.link_trace<<std::endl;
|
||||
|
||||
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
|
||||
std::cout << " Plaquette mismatch "<<std::endl;
|
||||
std::cout << Umu[0]<<std::endl;
|
||||
std::cout << Umu[1]<<std::endl;
|
||||
}
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << " checksum mismatch " << std::endl;
|
||||
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
|
||||
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
|
||||
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
||||
std::string file,
|
||||
int two_row,
|
||||
int bits32)
|
||||
{
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
FieldMetaData header;
|
||||
///////////////////////////////////////////
|
||||
// Following should become arguments
|
||||
///////////////////////////////////////////
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
typedef LorentzColourMatrixD fobj3D;
|
||||
typedef LorentzColour2x3D fobj2D;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
GaugeStatistics(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||
|
||||
}
|
||||
///////////////////////////////
|
||||
// RNG state
|
||||
///////////////////////////////
|
||||
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
// Following should become arguments
|
||||
FieldMetaData header;
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
header.link_trace=0.0;
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("RANLUX48");
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
header.floating_point = std::string("UINT32");
|
||||
header.data_type = std::string("MT19937");
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
std::string data_type(header.data_type);
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("RANLUX48"));
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
assert(format == std::string("UINT32"));
|
||||
assert(data_type == std::string("MT19937"));
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("SITMO"));
|
||||
#endif
|
||||
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
#endif
|
@ -1,124 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/QCD.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LT_H
|
||||
#define GRID_LT_H
|
||||
namespace Grid{
|
||||
|
||||
// First steps in the complete generalization of the Physics part
|
||||
// Design not final
|
||||
namespace LatticeTheories {
|
||||
|
||||
template <int Dimensions>
|
||||
struct LatticeTheory {
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nds = Dimensions * 2; // double stored field
|
||||
template <typename vtype>
|
||||
using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
};
|
||||
|
||||
template <int Dimensions, int Colours>
|
||||
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
|
||||
static const int Nds = Dimensions * 2;
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nc = Colours;
|
||||
|
||||
template <typename vtype>
|
||||
using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
|
||||
template <typename vtype>
|
||||
using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
|
||||
template <typename vtype>
|
||||
using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
|
||||
template <typename vtype>
|
||||
using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
};
|
||||
|
||||
template <int Dimensions, int Colours, int Spin>
|
||||
struct FermionicLatticeGaugeTheory
|
||||
: public LatticeGaugeTheory<Dimensions, Colours> {
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nds = Dimensions * 2;
|
||||
static const int Nc = Colours;
|
||||
static const int Ns = Spin;
|
||||
|
||||
template <typename vtype>
|
||||
using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
// These 2 only if Spin is a multiple of 2
|
||||
static const int Nhs = Spin / 2;
|
||||
template <typename vtype>
|
||||
using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template <typename vtype>
|
||||
using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
//tests
|
||||
typedef iColourMatrix<Complex> ColourMatrix;
|
||||
typedef iColourMatrix<ComplexF> ColourMatrixF;
|
||||
typedef iColourMatrix<ComplexD> ColourMatrixD;
|
||||
|
||||
|
||||
};
|
||||
|
||||
// Examples, not complete now.
|
||||
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
|
||||
static const int Xp = 0;
|
||||
static const int Yp = 1;
|
||||
static const int Zp = 2;
|
||||
static const int Tp = 3;
|
||||
static const int Xm = 4;
|
||||
static const int Ym = 5;
|
||||
static const int Zm = 6;
|
||||
static const int Tm = 7;
|
||||
|
||||
typedef FermionicLatticeGaugeTheory FLGT;
|
||||
|
||||
typedef FLGT::iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef FLGT::iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
typedef FLGT::iSpinMatrix<ComplexD > SpinMatrixD;
|
||||
|
||||
};
|
||||
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
|
||||
};
|
||||
|
||||
template <int Dimensions>
|
||||
struct Scalar : public LatticeTheory<Dimensions> {};
|
||||
|
||||
}; // LatticeTheories
|
||||
|
||||
} // Grid
|
||||
|
||||
#endif
|
@ -1,56 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionBase.h
|
||||
|
||||
Copyright (C) 2015-2016
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef ACTION_BASE_H
|
||||
#define ACTION_BASE_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template <class GaugeField >
|
||||
class Action
|
||||
{
|
||||
|
||||
public:
|
||||
bool is_smeared = false;
|
||||
// Heatbath?
|
||||
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||
virtual std::string action_name() = 0; // return the action name
|
||||
virtual std::string LogParameters() = 0; // prints action parameters
|
||||
virtual ~Action(){}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ACTION_BASE_H
|
@ -1,92 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionParams.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_QCD_ACTION_PARAMS_H
|
||||
#define GRID_QCD_ACTION_PARAMS_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// These can move into a params header and be given MacroMagic serialisation
|
||||
struct GparityWilsonImplParams {
|
||||
bool overlapCommsCompute;
|
||||
std::vector<int> twists;
|
||||
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
|
||||
};
|
||||
|
||||
struct WilsonImplParams {
|
||||
bool overlapCommsCompute;
|
||||
std::vector<Complex> boundary_phases;
|
||||
WilsonImplParams() : overlapCommsCompute(false) {
|
||||
boundary_phases.resize(Nd, 1.0);
|
||||
};
|
||||
WilsonImplParams(const std::vector<Complex> phi)
|
||||
: boundary_phases(phi), overlapCommsCompute(false) {}
|
||||
};
|
||||
|
||||
struct StaggeredImplParams {
|
||||
StaggeredImplParams() {};
|
||||
};
|
||||
|
||||
struct OneFlavourRationalParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
|
||||
RealD, lo,
|
||||
RealD, hi,
|
||||
int, MaxIter,
|
||||
RealD, tolerance,
|
||||
int, degree,
|
||||
int, precision);
|
||||
|
||||
// MaxIter and tolerance, vectors??
|
||||
|
||||
// constructor
|
||||
OneFlavourRationalParams( RealD _lo = 0.0,
|
||||
RealD _hi = 1.0,
|
||||
int _maxit = 1000,
|
||||
RealD tol = 1.0e-8,
|
||||
int _degree = 10,
|
||||
int _precision = 64)
|
||||
: lo(_lo),
|
||||
hi(_hi),
|
||||
MaxIter(_maxit),
|
||||
tolerance(tol),
|
||||
degree(_degree),
|
||||
precision(_precision){};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,100 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
#define GRID_QCD_ABSTRACT_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// DJM: Abstract base class for EOFA fermion types.
|
||||
// Defines layout of additional EOFA-specific parameters and operators.
|
||||
// Use to construct EOFA pseudofermion actions that are agnostic to
|
||||
// Shamir / Mobius / etc., and ensure that no one can construct EOFA
|
||||
// pseudofermion action with non-EOFA fermion type.
|
||||
template<class Impl>
|
||||
class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
|
||||
RealD mq1;
|
||||
RealD mq2;
|
||||
RealD mq3;
|
||||
RealD shift;
|
||||
int pm;
|
||||
|
||||
RealD alpha; // Mobius scale
|
||||
RealD k; // EOFA normalization constant
|
||||
|
||||
virtual void Instantiatable(void) = 0;
|
||||
|
||||
// EOFA-specific operations
|
||||
// Force user to implement in derived classes
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0;
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out) = 0;
|
||||
virtual void DtildeInv(const FermionField& in, FermionField& out) = 0;
|
||||
|
||||
// Implement derivatives in base class:
|
||||
// for EOFA both DWF and Mobius just need d(Dw)/dU
|
||||
virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDeriv(mat, U, V, dag);
|
||||
};
|
||||
virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivOE(mat, U, V, dag);
|
||||
};
|
||||
virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
|
||||
this->DhopDerivEO(mat, U, V, dag);
|
||||
};
|
||||
|
||||
// Recompute 5D coefficients for different value of shift constant
|
||||
// (needed for heatbath loop over poles)
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
|
||||
|
||||
// Constructors
|
||||
AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
|
||||
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
|
||||
: CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
|
||||
_mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
this->alpha = _b + _c;
|
||||
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
|
||||
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
|
||||
};
|
||||
};
|
||||
}}
|
||||
|
||||
#endif
|
@ -1,438 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3,
|
||||
RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
|
||||
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
|
||||
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
|
||||
_shift, _pm, _M5, 1.0, 0.0, p)
|
||||
{
|
||||
RealD eps = 1.0;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
|
||||
assert(zdata->n == this->Ls);
|
||||
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
|
||||
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
|
||||
|
||||
Approx::zolotarev_free(zdata);
|
||||
}
|
||||
|
||||
/***************************************************************
|
||||
* Additional EOFA operators only called outside the inverter.
|
||||
* Since speed is not essential, simple axpby-style
|
||||
* implementations should be fine.
|
||||
***************************************************************/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
Din = zero;
|
||||
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
|
||||
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
|
||||
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
|
||||
else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
|
||||
}
|
||||
|
||||
// This is just the identity for DWF
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
|
||||
|
||||
// This is just the identity for DWF
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
|
||||
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->Meooe5D(psi, Din);
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->DW(psi, Din, DaggerYes);
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
* Performance critical fermion operators called inside the inverter
|
||||
********************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
|
||||
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
|
||||
Coeff_t shiftp(0.0), shiftm(0.0);
|
||||
if(shift != 0.0){
|
||||
if(pm == 1){ shiftp = shift*(mq3-mq2); }
|
||||
else{ shiftm = -shift*(mq3-mq2); }
|
||||
}
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftp;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
|
||||
for(int i=0; i<diag.size(); ++i){
|
||||
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<upper.size(); ++i){
|
||||
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<lower.size(); ++i){
|
||||
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->M5D(psi, chi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
|
||||
// coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
|
||||
Coeff_t shiftp(0.0), shiftm(0.0);
|
||||
if(shift != 0.0){
|
||||
if(pm == 1){ shiftp = shift*(mq3-mq2); }
|
||||
else{ shiftm = -shift*(mq3-mq2); }
|
||||
}
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = mq1 + shiftm;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
|
||||
for(int i=0; i<diag.size(); ++i){
|
||||
std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<upper.size(); ++i){
|
||||
std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
|
||||
}
|
||||
for(int i=0; i<lower.size(); ++i){
|
||||
std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->M5Ddag(psi, chi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
// half checkerboard operations
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] = this->dm;
|
||||
lower[0] = this->dp;
|
||||
|
||||
this->M5D(psi, psi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] = this->dp;
|
||||
lower[0] = this->dm;
|
||||
|
||||
this->M5Ddag(psi, psi, chi, lower, diag, upper);
|
||||
}
|
||||
|
||||
/****************************************************************************************/
|
||||
|
||||
//Zolo
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD mq2 = this->mq2;
|
||||
RealD mq3 = this->mq3;
|
||||
RealD shift = this->shift;
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Constants for the preconditioned matrix Cayley form
|
||||
////////////////////////////////////////////////////////
|
||||
this->bs.resize(Ls);
|
||||
this->cs.resize(Ls);
|
||||
this->aee.resize(Ls);
|
||||
this->aeo.resize(Ls);
|
||||
this->bee.resize(Ls);
|
||||
this->beo.resize(Ls);
|
||||
this->cee.resize(Ls);
|
||||
this->ceo.resize(Ls);
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
this->bee[i] = 4.0 - this->M5 + 1.0;
|
||||
this->cee[i] = 1.0;
|
||||
}
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
this->aee[i] = this->cee[i];
|
||||
this->bs[i] = this->beo[i] = 1.0;
|
||||
this->cs[i] = this->ceo[i] = 0.0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// EOFA shift terms
|
||||
//////////////////////////////////////////
|
||||
if(pm == 1){
|
||||
this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
|
||||
this->dm = mq1*this->cee[Ls-1];
|
||||
} else if(this->pm == -1) {
|
||||
this->dp = mq1*this->cee[0];
|
||||
this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
|
||||
} else {
|
||||
this->dp = mq1*this->cee[0];
|
||||
this->dm = mq1*this->cee[Ls-1];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// LDU decomposition of eeoo
|
||||
//////////////////////////////////////////
|
||||
this->dee.resize(Ls+1);
|
||||
this->lee.resize(Ls);
|
||||
this->leem.resize(Ls);
|
||||
this->uee.resize(Ls);
|
||||
this->ueem.resize(Ls);
|
||||
|
||||
for(int i=0; i<Ls; ++i){
|
||||
|
||||
if(i < Ls-1){
|
||||
|
||||
this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
|
||||
|
||||
this->leem[i] = this->dm/this->bee[i];
|
||||
for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
|
||||
|
||||
this->dee[i] = this->bee[i];
|
||||
|
||||
this->uee[i] = -this->aee[i]/this->bee[i]; // up-diag entry on the ith row
|
||||
|
||||
this->ueem[i] = this->dp / this->bee[0];
|
||||
for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
|
||||
|
||||
} else {
|
||||
|
||||
this->lee[i] = 0.0;
|
||||
this->leem[i] = 0.0;
|
||||
this->uee[i] = 0.0;
|
||||
this->ueem[i] = 0.0;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
Coeff_t delta_d = 1.0 / this->bee[0];
|
||||
for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
|
||||
this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
|
||||
this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
|
||||
}
|
||||
|
||||
int inv = 1;
|
||||
this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
|
||||
this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
|
||||
}
|
||||
|
||||
// Recompute Cayley-form coefficients for different shift
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
|
||||
{
|
||||
this->shift = new_shift;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
|
||||
this->SetCoefficientsTanh(zdata, 1.0, 0.0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
GridBase* grid = this->FermionRedBlackGrid();
|
||||
int LLs = grid->_rdimensions[0];
|
||||
|
||||
if(LLs == Ls){ return; } // Not vectorised in 5th direction
|
||||
|
||||
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->dp;
|
||||
Pminus(Ls-1,0) = this->dm;
|
||||
|
||||
Eigen::MatrixXcd PplusMat ;
|
||||
Eigen::MatrixXcd PminusMat;
|
||||
|
||||
#if(0)
|
||||
std::cout << GridLogMessage << "Pplus:" << std::endl;
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int ss=0; ss<Ls; ++ss){
|
||||
std::cout << Pplus(s,ss) << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "Pminus:" << std::endl;
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int ss=0; ss<Ls; ++ss){
|
||||
std::cout << Pminus(s,ss) << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||
const int Nsimd = Simd::Nsimd();
|
||||
Matp.resize(Ls*LLs);
|
||||
Matm.resize(Ls*LLs);
|
||||
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
int istride = LLs;
|
||||
int ostride = 1;
|
||||
Simd Vp;
|
||||
Simd Vm;
|
||||
scalar_type *sp = (scalar_type*) &Vp;
|
||||
scalar_type *sm = (scalar_type*) &Vm;
|
||||
for(int l=0; l<Nsimd; l++){
|
||||
if(switcheroo<Coeff_t>::iscomplex()) {
|
||||
sp[l] = PplusMat (l*istride+s1*ostride,s2);
|
||||
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||
} else {
|
||||
// if real
|
||||
scalar_type tmp;
|
||||
tmp = PplusMat (l*istride+s1*ostride,s2);
|
||||
sp[l] = scalar_type(tmp.real(),tmp.real());
|
||||
tmp = PminusMat(l*istride+s1*ostride,s2);
|
||||
sm[l] = scalar_type(tmp.real(),tmp.real());
|
||||
}
|
||||
}
|
||||
Matp[LLs*s2+s1] = Vp;
|
||||
Matm[LLs*s2+s1] = Vm;
|
||||
}}
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(DomainWallEOFAFermion);
|
||||
GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
|
||||
|
||||
}}
|
@ -1,115 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
|
||||
#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
|
||||
// for red-black preconditioned Shamir EOFA
|
||||
Coeff_t dm;
|
||||
Coeff_t dp;
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// EOFA-specific operations
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out);
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag(const FermionField& in, FermionField& out);
|
||||
|
||||
virtual void M5D (const FermionField& psi, FermionField& chi);
|
||||
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
|
||||
|
||||
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift);
|
||||
|
||||
// Constructors
|
||||
DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
|
||||
RealD _M5, const ImplParams& p=ImplParams());
|
||||
|
||||
protected:
|
||||
void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
|
||||
};
|
||||
}}
|
||||
|
||||
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
|
||||
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
|
||||
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
|
||||
|
||||
#undef DOMAIN_WALL_EOFA_DPERP_DENSE
|
||||
#define DOMAIN_WALL_EOFA_DPERP_CACHE
|
||||
#undef DOMAIN_WALL_EOFA_DPERP_LINALG
|
||||
#define DOMAIN_WALL_EOFA_DPERP_VEC
|
||||
|
||||
#endif
|
@ -1,248 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
|
||||
// Pminus fowards
|
||||
// Pplus backwards..
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0) {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi._grid;
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard=psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
|
||||
// flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops
|
||||
// Apply (L^{\prime})^{-1}
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
|
||||
}
|
||||
spProj5m(tmp2, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5m(tmp1, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
assert(psi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
std::vector<Coeff_t> ueec(Ls);
|
||||
std::vector<Coeff_t> deec(Ls+1);
|
||||
std::vector<Coeff_t> leec(Ls);
|
||||
std::vector<Coeff_t> ueemc(Ls);
|
||||
std::vector<Coeff_t> leemc(Ls);
|
||||
|
||||
for(int s=0; s<ueec.size(); s++){
|
||||
ueec[s] = conjugate(this->uee[s]);
|
||||
deec[s] = conjugate(this->dee[s]);
|
||||
leec[s] = conjugate(this->lee[s]);
|
||||
ueemc[s] = conjugate(this->ueem[s]);
|
||||
leemc[s] = conjugate(this->leem[s]);
|
||||
}
|
||||
deec[Ls] = conjugate(this->dee[Ls]);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
chi[ss] = psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
|
||||
}
|
||||
spProj5p(tmp2, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5p(tmp1, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,159 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
assert(Ls==LLs);
|
||||
|
||||
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->dp;
|
||||
Pminus(Ls-1,0) = this->dm;
|
||||
|
||||
Eigen::MatrixXd PplusMat ;
|
||||
Eigen::MatrixXd PminusMat;
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
// For the non-vectorised s-direction this is simple
|
||||
|
||||
for(auto site=0; site<vol; site++){
|
||||
|
||||
SiteSpinor SiteChi;
|
||||
SiteHalfSpinor SitePplus;
|
||||
SiteHalfSpinor SitePminus;
|
||||
|
||||
for(int s1=0; s1<Ls; s1++){
|
||||
SiteChi = zero;
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
int lex2 = s2 + Ls*site;
|
||||
if(PplusMat(s1,s2) != 0.0){
|
||||
spProj5p(SitePplus,psi[lex2]);
|
||||
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
|
||||
}
|
||||
if(PminusMat(s1,s2) != 0.0){
|
||||
spProj5m(SitePminus, psi[lex2]);
|
||||
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
|
||||
}
|
||||
}
|
||||
chi[s1+Ls*site] = SiteChi*0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,168 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
// Pminus fowards
|
||||
// Pplus backwards
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
|
||||
axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
|
||||
axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,605 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd> > u(LLs);
|
||||
Vector<iSinglet<Simd> > l(LLs);
|
||||
Vector<iSinglet<Simd> > d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0;o<LLs;o++){ // outer
|
||||
for(int i=0;i<nsimd;i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5m(hp, psi[ss+vp]);
|
||||
spProj5p(hm, psi[ss+vm]);
|
||||
|
||||
if (vp <= v){ rotate(hp, hp, 1); }
|
||||
if (vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = 0.5*hp;
|
||||
hm = 0.5*hm;
|
||||
|
||||
spRecon5m(fp, hp);
|
||||
spRecon5p(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v];
|
||||
chi[ss+v] = chi[ss+v] + u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] + l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v==LLs-1) ? 0 : v+1;
|
||||
int vm = (v==0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd> > u(LLs);
|
||||
Vector<iSinglet<Simd> > l(LLs);
|
||||
Vector<iSinglet<Simd> > d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5p(hp, psi[ss+vp]);
|
||||
spProj5m(hm, psi[ss+vm]);
|
||||
|
||||
if(vp <= v){ rotate(hp, hp, 1); }
|
||||
if(vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = hp*0.5;
|
||||
hm = hm*0.5;
|
||||
spRecon5p(fp, hp);
|
||||
spRecon5m(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
#ifdef AVX512
|
||||
#include<simd/Intel512common.h>
|
||||
#include<simd/Intel512avx.h>
|
||||
#include<simd/Intel512single.h>
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
SiteHalfSpinor SiteChiP;
|
||||
SiteHalfSpinor SiteChiM;
|
||||
|
||||
// Ls*Ls * 2 * 12 * vol flops
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
int s = s2 + l*LLs;
|
||||
int lex = s2 + LLs*site;
|
||||
|
||||
if( s2==0 && l==0 ){
|
||||
SiteChiP=zero;
|
||||
SiteChiM=zero;
|
||||
}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
|
||||
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
|
||||
}}
|
||||
}}
|
||||
|
||||
{
|
||||
int lex = s1 + LLs*site;
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
{
|
||||
// pointers
|
||||
// MASK_REGS;
|
||||
#define Chi_00 %%zmm1
|
||||
#define Chi_01 %%zmm2
|
||||
#define Chi_02 %%zmm3
|
||||
#define Chi_10 %%zmm4
|
||||
#define Chi_11 %%zmm5
|
||||
#define Chi_12 %%zmm6
|
||||
#define Chi_20 %%zmm7
|
||||
#define Chi_21 %%zmm8
|
||||
#define Chi_22 %%zmm9
|
||||
#define Chi_30 %%zmm10
|
||||
#define Chi_31 %%zmm11
|
||||
#define Chi_32 %%zmm12
|
||||
|
||||
#define BCAST0 %%zmm13
|
||||
#define BCAST1 %%zmm14
|
||||
#define BCAST2 %%zmm15
|
||||
#define BCAST3 %%zmm16
|
||||
#define BCAST4 %%zmm17
|
||||
#define BCAST5 %%zmm18
|
||||
#define BCAST6 %%zmm19
|
||||
#define BCAST7 %%zmm20
|
||||
#define BCAST8 %%zmm21
|
||||
#define BCAST9 %%zmm22
|
||||
#define BCAST10 %%zmm23
|
||||
#define BCAST11 %%zmm24
|
||||
|
||||
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
|
||||
int lex = s2 + LLs*site;
|
||||
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
|
||||
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
|
||||
uint64_t a2 = (uint64_t) &psi[lex];
|
||||
|
||||
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
|
||||
if((s2+l)==0) {
|
||||
asm(
|
||||
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
|
||||
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
|
||||
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
|
||||
VBCASTCDUP(0,%2,BCAST0)
|
||||
VBCASTCDUP(1,%2,BCAST1)
|
||||
VBCASTCDUP(2,%2,BCAST2)
|
||||
VBCASTCDUP(3,%2,BCAST3)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
|
||||
VMULMEM(0,%1,BCAST8,Chi_22)
|
||||
VMULMEM(0,%1,BCAST9,Chi_30)
|
||||
VMULMEM(0,%1,BCAST10,Chi_31)
|
||||
VMULMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
} else {
|
||||
asm(
|
||||
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
}
|
||||
a0 = a0 + incr;
|
||||
a1 = a1 + incr;
|
||||
a2 = a2 + sizeof(typename Simd::scalar_type);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int lexa = s1+LLs*site;
|
||||
asm (
|
||||
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
|
||||
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
|
||||
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
|
||||
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
|
||||
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef Chi_00
|
||||
#undef Chi_01
|
||||
#undef Chi_02
|
||||
#undef Chi_10
|
||||
#undef Chi_11
|
||||
#undef Chi_12
|
||||
#undef Chi_20
|
||||
#undef Chi_21
|
||||
#undef Chi_22
|
||||
#undef Chi_30
|
||||
#undef Chi_31
|
||||
#undef Chi_32
|
||||
|
||||
#undef BCAST0
|
||||
#undef BCAST1
|
||||
#undef BCAST2
|
||||
#undef BCAST3
|
||||
#undef BCAST4
|
||||
#undef BCAST5
|
||||
#undef BCAST6
|
||||
#undef BCAST7
|
||||
#undef BCAST8
|
||||
#undef BCAST9
|
||||
#undef BCAST10
|
||||
#undef BCAST11
|
||||
#endif
|
||||
};
|
||||
|
||||
// Z-mobius version
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
Vector<iSinglet<Simd> > Matp;
|
||||
Vector<iSinglet<Simd> > Matm;
|
||||
Vector<iSinglet<Simd> > *_Matp;
|
||||
Vector<iSinglet<Simd> > *_Matm;
|
||||
|
||||
// MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||
if(inv && dag){
|
||||
_Matp = &this->MatpInvDag;
|
||||
_Matm = &this->MatmInvDag;
|
||||
}
|
||||
|
||||
if(inv && (!dag)){
|
||||
_Matp = &this->MatpInv;
|
||||
_Matm = &this->MatmInv;
|
||||
}
|
||||
|
||||
if(!inv){
|
||||
MooeeInternalCompute(dag, inv, Matp, Matm);
|
||||
_Matp = &Matp;
|
||||
_Matm = &Matm;
|
||||
}
|
||||
|
||||
assert(_Matp->size() == Ls*LLs);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
if(switcheroo<Coeff_t>::iscomplex()){
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
} else {
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef DOMAIN_WALL_EOFA_DPERP_VEC
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
|
||||
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
|
||||
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,237 +0,0 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/FourierAcceleratedPV.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christoph Lehner (lifted with permission by Peter Boyle, brought back to Grid)
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<typename M>
|
||||
void get_real_const_bc(M& m, RealD& _b, RealD& _c) {
|
||||
ComplexD b,c;
|
||||
b=m.bs[0];
|
||||
c=m.cs[0];
|
||||
std::cout << GridLogMessage << "b=" << b << ", c=" << c << std::endl;
|
||||
for (size_t i=1;i<m.bs.size();i++) {
|
||||
assert(m.bs[i] == b);
|
||||
assert(m.cs[i] == c);
|
||||
}
|
||||
assert(b.imag() == 0.0);
|
||||
assert(c.imag() == 0.0);
|
||||
_b = b.real();
|
||||
_c = c.real();
|
||||
}
|
||||
|
||||
|
||||
template<typename Vi, typename M, typename G>
|
||||
class FourierAcceleratedPV {
|
||||
public:
|
||||
|
||||
ConjugateGradient<Vi> &cg;
|
||||
M& dwfPV;
|
||||
G& Umu;
|
||||
GridCartesian* grid5D;
|
||||
GridRedBlackCartesian* gridRB5D;
|
||||
int group_in_s;
|
||||
|
||||
FourierAcceleratedPV(M& _dwfPV, G& _Umu, ConjugateGradient<Vi> &_cg, int _group_in_s = 2)
|
||||
: dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s)
|
||||
{
|
||||
assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0);
|
||||
grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid);
|
||||
gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid);
|
||||
}
|
||||
|
||||
void rotatePV(const Vi& _src, Vi& dst, bool forward) const {
|
||||
|
||||
GridStopWatch gsw1, gsw2;
|
||||
|
||||
typedef typename Vi::scalar_type Coeff_t;
|
||||
int Ls = dst._grid->_fdimensions[0];
|
||||
|
||||
Vi _tmp(dst._grid);
|
||||
double phase = M_PI / (double)Ls;
|
||||
Coeff_t bzero(0.0,0.0);
|
||||
|
||||
FFT theFFT((GridCartesian*)dst._grid);
|
||||
|
||||
if (!forward) {
|
||||
gsw1.Start();
|
||||
for (int s=0;s<Ls;s++) {
|
||||
Coeff_t a(::cos(phase*s),-::sin(phase*s));
|
||||
axpby_ssp(_tmp,a,_src,bzero,_src,s,s);
|
||||
}
|
||||
gsw1.Stop();
|
||||
|
||||
gsw2.Start();
|
||||
theFFT.FFT_dim(dst,_tmp,0,FFT::forward);
|
||||
gsw2.Stop();
|
||||
|
||||
} else {
|
||||
|
||||
gsw2.Start();
|
||||
theFFT.FFT_dim(_tmp,_src,0,FFT::backward);
|
||||
gsw2.Stop();
|
||||
|
||||
gsw1.Start();
|
||||
for (int s=0;s<Ls;s++) {
|
||||
Coeff_t a(::cos(phase*s),::sin(phase*s));
|
||||
axpby_ssp(dst,a,_tmp,bzero,_tmp,s,s);
|
||||
}
|
||||
gsw1.Stop();
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Timing rotatePV: " << gsw1.Elapsed() << ", " << gsw2.Elapsed() << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void pvInv(const Vi& _src, Vi& _dst) const {
|
||||
|
||||
std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<<std::endl;
|
||||
|
||||
typedef typename Vi::scalar_type Coeff_t;
|
||||
int Ls = _dst._grid->_fdimensions[0];
|
||||
|
||||
GridStopWatch gswT;
|
||||
gswT.Start();
|
||||
|
||||
RealD b,c;
|
||||
get_real_const_bc(dwfPV,b,c);
|
||||
RealD M5 = dwfPV.M5;
|
||||
|
||||
// U(true) Rightinv TMinv U(false) = Minv
|
||||
|
||||
Vi _src_diag(_dst._grid);
|
||||
Vi _src_diag_slice(dwfPV.GaugeGrid());
|
||||
Vi _dst_diag_slice(dwfPV.GaugeGrid());
|
||||
Vi _src_diag_slices(grid5D);
|
||||
Vi _dst_diag_slices(grid5D);
|
||||
Vi _dst_diag(_dst._grid);
|
||||
|
||||
rotatePV(_src,_src_diag,false);
|
||||
|
||||
// now do TM solves
|
||||
Gamma G5(Gamma::Algebra::Gamma5);
|
||||
|
||||
GridStopWatch gswA, gswB;
|
||||
|
||||
gswA.Start();
|
||||
|
||||
typedef typename M::Impl_t Impl;
|
||||
//WilsonTMFermion<Impl> tm(x.Umu,*x.UGridF,*x.UrbGridF,0.0,0.0,solver_outer.parent.par.wparams_f);
|
||||
std::vector<RealD> vmass(grid5D->_fdimensions[0],0.0);
|
||||
std::vector<RealD> vmu(grid5D->_fdimensions[0],0.0);
|
||||
|
||||
WilsonTMFermion5D<Impl> tm(Umu,*grid5D,*gridRB5D,
|
||||
*(GridCartesian*)dwfPV.GaugeGrid(),
|
||||
*(GridRedBlackCartesian*)dwfPV.GaugeRedBlackGrid(),
|
||||
vmass,vmu);
|
||||
|
||||
//SchurRedBlackDiagTwoSolve<Vi> sol(cg);
|
||||
SchurRedBlackDiagMooeeSolve<Vi> sol(cg); // same performance as DiagTwo
|
||||
gswA.Stop();
|
||||
|
||||
gswB.Start();
|
||||
|
||||
for (int sgroup=0;sgroup<Ls/2/group_in_s;sgroup++) {
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
int sprime = Ls-s-1;
|
||||
|
||||
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
||||
RealD cosp = ::cos(phase);
|
||||
RealD sinp = ::sin(phase);
|
||||
RealD denom = b*b + c*c + 2.0*b*c*cosp;
|
||||
RealD mass = -(b*b*M5 + c*(1.0 - cosp + c*M5) + b*(-1.0 + cosp + 2.0*c*cosp*M5))/denom;
|
||||
RealD mu = (b+c)*sinp/denom;
|
||||
|
||||
vmass[2*sidx + 0] = mass;
|
||||
vmass[2*sidx + 1] = mass;
|
||||
vmu[2*sidx + 0] = mu;
|
||||
vmu[2*sidx + 1] = -mu;
|
||||
|
||||
}
|
||||
|
||||
tm.update(vmass,vmu);
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
int sprime = Ls-s-1;
|
||||
|
||||
ExtractSlice(_src_diag_slice,_src_diag,s,0);
|
||||
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 0,0);
|
||||
|
||||
ExtractSlice(_src_diag_slice,_src_diag,sprime,0);
|
||||
InsertSlice(_src_diag_slice,_src_diag_slices,2*sidx + 1,0);
|
||||
|
||||
}
|
||||
|
||||
GridStopWatch gsw;
|
||||
gsw.Start();
|
||||
_dst_diag_slices = zero; // zero guess
|
||||
sol(tm,_src_diag_slices,_dst_diag_slices);
|
||||
gsw.Stop();
|
||||
std::cout << GridLogMessage << "Solve[sgroup=" << sgroup << "] completed in " << gsw.Elapsed() << ", " << gswA.Elapsed() << std::endl;
|
||||
|
||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||
|
||||
int s = sgroup*group_in_s + sidx;
|
||||
int sprime = Ls-s-1;
|
||||
|
||||
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
||||
RealD cosp = ::cos(phase);
|
||||
RealD sinp = ::sin(phase);
|
||||
|
||||
// now rotate with inverse of
|
||||
Coeff_t pA = b + c*cosp;
|
||||
Coeff_t pB = - Coeff_t(0.0,1.0)*c*sinp;
|
||||
Coeff_t pABden = pA*pA - pB*pB;
|
||||
// (pA + pB * G5) * (pA - pB*G5) = (pA^2 - pB^2)
|
||||
|
||||
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 0,0);
|
||||
_dst_diag_slice = (pA/pABden) * _dst_diag_slice - (pB/pABden) * (G5 * _dst_diag_slice);
|
||||
InsertSlice(_dst_diag_slice,_dst_diag,s,0);
|
||||
|
||||
ExtractSlice(_dst_diag_slice,_dst_diag_slices,2*sidx + 1,0);
|
||||
_dst_diag_slice = (pA/pABden) * _dst_diag_slice + (pB/pABden) * (G5 * _dst_diag_slice);
|
||||
InsertSlice(_dst_diag_slice,_dst_diag,sprime,0);
|
||||
}
|
||||
}
|
||||
gswB.Stop();
|
||||
|
||||
rotatePV(_dst_diag,_dst,true);
|
||||
|
||||
gswT.Stop();
|
||||
std::cout << GridLogMessage << "PV completed in " << gswT.Elapsed() << " (Setup: " << gswA.Elapsed() << ", s-loop: " << gswB.Elapsed() << ")" << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
}}
|
@ -1,193 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/MADWF.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template <class Fieldi, class Fieldo,IfNotSame<Fieldi,Fieldo> X=0>
|
||||
inline void convert(const Fieldi &from,Fieldo &to)
|
||||
{
|
||||
precisionChange(to,from);
|
||||
}
|
||||
template <class Fieldi, class Fieldo,IfSame<Fieldi,Fieldo> X=0>
|
||||
inline void convert(const Fieldi &from,Fieldo &to)
|
||||
{
|
||||
to=from;
|
||||
}
|
||||
|
||||
template<class Matrixo,class Matrixi,class PVinverter,class SchurSolver, class Guesser>
|
||||
class MADWF
|
||||
{
|
||||
private:
|
||||
typedef typename Matrixo::FermionField FermionFieldo;
|
||||
typedef typename Matrixi::FermionField FermionFieldi;
|
||||
|
||||
PVinverter & PauliVillarsSolvero;// For the outer field
|
||||
SchurSolver & SchurSolveri; // For the inner approx field
|
||||
Guesser & Guesseri; // To deflate the inner approx solves
|
||||
|
||||
Matrixo & Mato; // Action object for outer
|
||||
Matrixi & Mati; // Action object for inner
|
||||
|
||||
RealD target_resid;
|
||||
int maxiter;
|
||||
public:
|
||||
|
||||
MADWF(Matrixo &_Mato,
|
||||
Matrixi &_Mati,
|
||||
PVinverter &_PauliVillarsSolvero,
|
||||
SchurSolver &_SchurSolveri,
|
||||
Guesser & _Guesseri,
|
||||
RealD resid,
|
||||
int _maxiter) :
|
||||
|
||||
Mato(_Mato),Mati(_Mati),
|
||||
SchurSolveri(_SchurSolveri),
|
||||
PauliVillarsSolvero(_PauliVillarsSolvero),Guesseri(_Guesseri)
|
||||
{
|
||||
target_resid=resid;
|
||||
maxiter =_maxiter;
|
||||
};
|
||||
|
||||
void operator() (const FermionFieldo &src4,FermionFieldo &sol5)
|
||||
{
|
||||
std::cout << GridLogMessage<< " ************************************************" << std::endl;
|
||||
std::cout << GridLogMessage<< " MADWF-like algorithm " << std::endl;
|
||||
std::cout << GridLogMessage<< " ************************************************" << std::endl;
|
||||
|
||||
FermionFieldi c0i(Mati.GaugeGrid()); // 4d
|
||||
FermionFieldi y0i(Mati.GaugeGrid()); // 4d
|
||||
FermionFieldo c0 (Mato.GaugeGrid()); // 4d
|
||||
FermionFieldo y0 (Mato.GaugeGrid()); // 4d
|
||||
|
||||
FermionFieldo A(Mato.FermionGrid()); // Temporary outer
|
||||
FermionFieldo B(Mato.FermionGrid()); // Temporary outer
|
||||
FermionFieldo b(Mato.FermionGrid()); // 5d source
|
||||
|
||||
FermionFieldo c(Mato.FermionGrid()); // PVinv source; reused so store
|
||||
FermionFieldo defect(Mato.FermionGrid()); // 5d source
|
||||
|
||||
FermionFieldi ci(Mati.FermionGrid());
|
||||
FermionFieldi yi(Mati.FermionGrid());
|
||||
FermionFieldi xi(Mati.FermionGrid());
|
||||
FermionFieldi srci(Mati.FermionGrid());
|
||||
FermionFieldi Ai(Mati.FermionGrid());
|
||||
|
||||
RealD m=Mati.Mass();
|
||||
|
||||
///////////////////////////////////////
|
||||
//Import source, include Dminus factors
|
||||
///////////////////////////////////////
|
||||
Mato.ImportPhysicalFermionSource(src4,b);
|
||||
std::cout << GridLogMessage << " src4 " <<norm2(src4)<<std::endl;
|
||||
std::cout << GridLogMessage << " b " <<norm2(b)<<std::endl;
|
||||
|
||||
defect = b;
|
||||
sol5=zero;
|
||||
for (int i=0;i<maxiter;i++) {
|
||||
|
||||
///////////////////////////////////////
|
||||
// Set up c0 from current defect
|
||||
///////////////////////////////////////
|
||||
PauliVillarsSolvero(Mato,defect,A);
|
||||
Mato.Pdag(A,c);
|
||||
ExtractSlice(c0, c, 0 , 0);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Solve the inner system with surface term c0
|
||||
////////////////////////////////////////////////
|
||||
ci = zero;
|
||||
convert(c0,c0i); // Possible precison change
|
||||
InsertSlice(c0i,ci,0, 0);
|
||||
|
||||
// Dwm P y = Dwm x = D(1) P (c0,0,0,0)^T
|
||||
Mati.P(ci,Ai);
|
||||
Mati.SetMass(1.0); Mati.M(Ai,srci); Mati.SetMass(m);
|
||||
SchurSolveri(Mati,srci,xi,Guesseri);
|
||||
Mati.Pdag(xi,yi);
|
||||
ExtractSlice(y0i, yi, 0 , 0);
|
||||
convert(y0i,y0); // Possible precision change
|
||||
|
||||
//////////////////////////////////////
|
||||
// Propagate solution back to outer system
|
||||
// Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
|
||||
//////////////////////////////////////
|
||||
c0 = - y0;
|
||||
InsertSlice(c0, c, 0 , 0);
|
||||
|
||||
/////////////////////////////
|
||||
// Reconstruct the bulk solution Pdag PV^-1 Dm P
|
||||
/////////////////////////////
|
||||
Mato.P(c,B);
|
||||
Mato.M(B,A);
|
||||
PauliVillarsSolvero(Mato,A,B);
|
||||
Mato.Pdag(B,A);
|
||||
|
||||
//////////////////////////////
|
||||
// Reinsert surface prop
|
||||
//////////////////////////////
|
||||
InsertSlice(y0,A,0,0);
|
||||
|
||||
//////////////////////////////
|
||||
// Convert from y back to x
|
||||
//////////////////////////////
|
||||
Mato.P(A,B);
|
||||
|
||||
// sol5' = sol5 + M^-1 defect
|
||||
// = sol5 + M^-1 src - M^-1 M sol5 ...
|
||||
sol5 = sol5 + B;
|
||||
std::cout << GridLogMessage << "***************************************" <<std::endl;
|
||||
std::cout << GridLogMessage << " Sol5 update "<<std::endl;
|
||||
std::cout << GridLogMessage << "***************************************" <<std::endl;
|
||||
std::cout << GridLogMessage << " Sol5 now "<<norm2(sol5)<<std::endl;
|
||||
std::cout << GridLogMessage << " delta "<<norm2(B)<<std::endl;
|
||||
|
||||
// New defect = b - M sol5
|
||||
Mato.M(sol5,A);
|
||||
defect = b - A;
|
||||
|
||||
std::cout << GridLogMessage << " defect "<<norm2(defect)<<std::endl;
|
||||
|
||||
double resid = ::sqrt(norm2(defect) / norm2(b));
|
||||
std::cout << GridLogMessage << "Residual " << i << ": " << resid << std::endl;
|
||||
std::cout << GridLogMessage << "***************************************" <<std::endl;
|
||||
|
||||
if (resid < target_resid) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "MADWF : Exceeded maxiter "<<std::endl;
|
||||
assert(0);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}}
|
@ -1,502 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3,
|
||||
RealD _shift, int _pm, RealD _M5,
|
||||
RealD _b, RealD _c, const ImplParams &p) :
|
||||
AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
|
||||
FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
|
||||
_shift, _pm, _M5, _b, _c, p)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
RealD eps = 1.0;
|
||||
Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
|
||||
assert(zdata->n == this->Ls);
|
||||
|
||||
std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
|
||||
",c=" << _c << ") with Ls=" << Ls << std::endl;
|
||||
this->SetCoefficientsTanh(zdata, _b, _c);
|
||||
std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
|
||||
",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
|
||||
",pm=" << _pm << ")" << std::endl;
|
||||
|
||||
Approx::zolotarev_free(zdata);
|
||||
|
||||
if(_shift != 0.0){
|
||||
SetCoefficientsPrecondShiftOps();
|
||||
} else {
|
||||
Mooee_shift.resize(Ls, 0.0);
|
||||
MooeeInv_shift_lc.resize(Ls, 0.0);
|
||||
MooeeInv_shift_norm.resize(Ls, 0.0);
|
||||
MooeeInvDag_shift_lc.resize(Ls, 0.0);
|
||||
MooeeInvDag_shift_norm.resize(Ls, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************
|
||||
* Additional EOFA operators only called outside the inverter.
|
||||
* Since speed is not essential, simple axpby-style
|
||||
* implementations should be fine.
|
||||
***************************************************************/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD alpha = this->alpha;
|
||||
|
||||
Din = zero;
|
||||
if((sign == 1) && (dag == 0)) { // \Omega_{+}
|
||||
for(int s=0; s<Ls; ++s){
|
||||
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
|
||||
}
|
||||
} else if((sign == -1) && (dag == 0)) { // \Omega_{-}
|
||||
for(int s=0; s<Ls; ++s){
|
||||
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
|
||||
}
|
||||
} else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
|
||||
}
|
||||
} else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
|
||||
// It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD b = 0.5 * ( 1.0 + this->alpha );
|
||||
RealD c = 0.5 * ( 1.0 - this->alpha );
|
||||
RealD mq1 = this->mq1;
|
||||
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(s == 0) {
|
||||
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
|
||||
} else if(s == (Ls-1)) {
|
||||
axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
RealD m = this->mq1;
|
||||
RealD c = 0.5 * this->alpha;
|
||||
RealD d = 0.5;
|
||||
|
||||
RealD DtInv_p(0.0), DtInv_m(0.0);
|
||||
RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
|
||||
FermionField tmp(this->FermionGrid());
|
||||
|
||||
for(int s=0; s<Ls; ++s){
|
||||
for(int sp=0; sp<Ls; ++sp){
|
||||
|
||||
DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
|
||||
DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
|
||||
DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
|
||||
DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
|
||||
|
||||
if(sp == 0){
|
||||
axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
|
||||
axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
|
||||
} else {
|
||||
axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
|
||||
axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
|
||||
}
|
||||
|
||||
}}
|
||||
}
|
||||
|
||||
/*****************************************************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->Meooe5D(psi, Din);
|
||||
this->DW(Din, chi, DaggerNo);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
this->M5D(psi, chi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField Din(psi._grid);
|
||||
|
||||
this->DW(psi, Din, DaggerYes);
|
||||
this->MeooeDag5D(Din, chi);
|
||||
this->M5Ddag(psi, chi);
|
||||
axpby(chi, 1.0, 1.0, chi, psi);
|
||||
return(norm2(chi));
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
* Performance critical fermion operators called inside the inverter
|
||||
********************************************************************/
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
std::vector<Coeff_t> diag(Ls,1.0);
|
||||
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = this->mq1;
|
||||
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] = this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
// half checkerboard operations
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
// coefficients of Mooee
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
for(int s=0; s<Ls; s++){
|
||||
upper[s] = -this->cee[s];
|
||||
lower[s] = -this->cee[s];
|
||||
}
|
||||
upper[Ls-1] *= -this->mq1;
|
||||
lower[0] *= -this->mq1;
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
// coefficients of MooeeDag
|
||||
std::vector<Coeff_t> diag = this->bee;
|
||||
std::vector<Coeff_t> upper(Ls);
|
||||
std::vector<Coeff_t> lower(Ls);
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
upper[s] = -this->cee[s+1];
|
||||
lower[s] = this->mq1*this->cee[Ls-1];
|
||||
} else if(s==(Ls-1)) {
|
||||
upper[s] = this->mq1*this->cee[0];
|
||||
lower[s] = -this->cee[s-1];
|
||||
} else {
|
||||
upper[s] = -this->cee[s+1];
|
||||
lower[s] = -this->cee[s-1];
|
||||
}
|
||||
}
|
||||
|
||||
// no shift term
|
||||
if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
|
||||
|
||||
// fused M + shift operation
|
||||
else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
|
||||
}
|
||||
|
||||
/****************************************************************************************/
|
||||
|
||||
// Computes coefficients for applying Cayley preconditioned shift operators
|
||||
// (Mooee + \Delta) --> Mooee_shift
|
||||
// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
|
||||
// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
|
||||
// For the latter two cases, the operation takes the form
|
||||
// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
|
||||
// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int pm = this->pm;
|
||||
RealD alpha = this->alpha;
|
||||
RealD k = this->k;
|
||||
RealD mq1 = this->mq1;
|
||||
RealD shift = this->shift;
|
||||
|
||||
// Initialize
|
||||
Mooee_shift.resize(Ls);
|
||||
MooeeInv_shift_lc.resize(Ls);
|
||||
MooeeInv_shift_norm.resize(Ls);
|
||||
MooeeInvDag_shift_lc.resize(Ls);
|
||||
MooeeInvDag_shift_norm.resize(Ls);
|
||||
|
||||
// Construct Mooee_shift
|
||||
int idx(0);
|
||||
Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
|
||||
( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
|
||||
for(int s=0; s<Ls; ++s){
|
||||
idx = (pm == 1) ? (s) : (Ls-1-s);
|
||||
Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
|
||||
}
|
||||
|
||||
// Tridiagonal solve for MooeeInvDag_shift_lc
|
||||
{
|
||||
Coeff_t m(0.0);
|
||||
std::vector<Coeff_t> d = Mooee_shift;
|
||||
std::vector<Coeff_t> u(Ls,0.0);
|
||||
std::vector<Coeff_t> y(Ls,0.0);
|
||||
std::vector<Coeff_t> q(Ls,0.0);
|
||||
if(pm == 1){ u[0] = 1.0; }
|
||||
else{ u[Ls-1] = 1.0; }
|
||||
|
||||
// Tridiagonal matrix algorithm + Sherman-Morrison formula
|
||||
//
|
||||
// We solve
|
||||
// ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
|
||||
// where Mooee' is the tridiagonal part of Mooee_{+}, and
|
||||
// u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
|
||||
// so that the outer-product u \otimes v gives the (0,Ls-1)
|
||||
// entry of Mooee_{+}.
|
||||
//
|
||||
// We do this as two solves: Mooee'*y = d and Mooee'*q = u,
|
||||
// and then construct the solution to the original system
|
||||
// MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
|
||||
if(pm == 1){
|
||||
for(int s=1; s<Ls; ++s){
|
||||
m = -this->cee[s] / this->bee[s-1];
|
||||
d[s] -= m*d[s-1];
|
||||
u[s] -= m*u[s-1];
|
||||
}
|
||||
}
|
||||
y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
|
||||
q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
|
||||
for(int s=Ls-2; s>=0; --s){
|
||||
if(pm == 1){
|
||||
y[s] = d[s] / this->bee[s];
|
||||
q[s] = u[s] / this->bee[s];
|
||||
} else {
|
||||
y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
|
||||
q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
|
||||
}
|
||||
}
|
||||
|
||||
// Construct MooeeInvDag_shift_lc
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(pm == 1){
|
||||
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
|
||||
(1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
|
||||
} else {
|
||||
MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
|
||||
(1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute remaining coefficients
|
||||
N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
|
||||
for(int s=0; s<Ls; ++s){
|
||||
|
||||
// MooeeInv_shift_lc
|
||||
if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
|
||||
else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
|
||||
|
||||
// MooeeInv_shift_norm
|
||||
MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
|
||||
|
||||
// MooeeInvDag_shift_norm
|
||||
if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
|
||||
else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
|
||||
( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recompute coefficients for a different value of shift constant
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
|
||||
{
|
||||
this->shift = new_shift;
|
||||
if(new_shift != 0.0){
|
||||
SetCoefficientsPrecondShiftOps();
|
||||
} else {
|
||||
int Ls = this->Ls;
|
||||
Mooee_shift.resize(Ls,0.0);
|
||||
MooeeInv_shift_lc.resize(Ls,0.0);
|
||||
MooeeInv_shift_norm.resize(Ls,0.0);
|
||||
MooeeInvDag_shift_lc.resize(Ls,0.0);
|
||||
MooeeInvDag_shift_norm.resize(Ls,0.0);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
|
||||
Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
|
||||
GridBase* grid = this->FermionRedBlackGrid();
|
||||
int LLs = grid->_rdimensions[0];
|
||||
|
||||
if(LLs == Ls){ return; } // Not vectorised in 5th direction
|
||||
|
||||
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0; s<Ls; s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
|
||||
Pplus (0,Ls-1) = this->mq1*this->cee[0];
|
||||
Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
|
||||
|
||||
if(this->shift != 0.0){
|
||||
RealD c = 0.5 * this->alpha;
|
||||
RealD d = 0.5;
|
||||
RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
|
||||
if(this->pm == 1) {
|
||||
for(int s=0; s<Ls; ++s){
|
||||
Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
|
||||
}
|
||||
} else {
|
||||
for(int s=0; s<Ls; ++s){
|
||||
Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Eigen::MatrixXcd PplusMat ;
|
||||
Eigen::MatrixXcd PminusMat;
|
||||
|
||||
if(inv) {
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||
const int Nsimd = Simd::Nsimd();
|
||||
Matp.resize(Ls*LLs);
|
||||
Matm.resize(Ls*LLs);
|
||||
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
int istride = LLs;
|
||||
int ostride = 1;
|
||||
Simd Vp;
|
||||
Simd Vm;
|
||||
scalar_type *sp = (scalar_type*) &Vp;
|
||||
scalar_type *sm = (scalar_type*) &Vm;
|
||||
for(int l=0; l<Nsimd; l++){
|
||||
if(switcheroo<Coeff_t>::iscomplex()) {
|
||||
sp[l] = PplusMat (l*istride+s1*ostride,s2);
|
||||
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||
} else {
|
||||
// if real
|
||||
scalar_type tmp;
|
||||
tmp = PplusMat (l*istride+s1*ostride,s2);
|
||||
sp[l] = scalar_type(tmp.real(),tmp.real());
|
||||
tmp = PminusMat(l*istride+s1*ostride,s2);
|
||||
sm[l] = scalar_type(tmp.real(),tmp.real());
|
||||
}
|
||||
}
|
||||
Matp[LLs*s2+s1] = Vp;
|
||||
Matm[LLs*s2+s1] = Vm;
|
||||
}}
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(MobiusEOFAFermion);
|
||||
GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
|
||||
|
||||
}}
|
@ -1,133 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H
|
||||
#define GRID_QCD_MOBIUS_EOFA_FERMION_H
|
||||
|
||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
public:
|
||||
// Shift operator coefficients for red-black preconditioned Mobius EOFA
|
||||
std::vector<Coeff_t> Mooee_shift;
|
||||
std::vector<Coeff_t> MooeeInv_shift_lc;
|
||||
std::vector<Coeff_t> MooeeInv_shift_norm;
|
||||
std::vector<Coeff_t> MooeeInvDag_shift_lc;
|
||||
std::vector<Coeff_t> MooeeInvDag_shift_norm;
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// EOFA-specific operations
|
||||
virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag);
|
||||
virtual void Dtilde (const FermionField& in, FermionField& out);
|
||||
virtual void DtildeInv (const FermionField& in, FermionField& out);
|
||||
|
||||
// override multiply
|
||||
virtual RealD M (const FermionField& in, FermionField& out);
|
||||
virtual RealD Mdag (const FermionField& in, FermionField& out);
|
||||
|
||||
// half checkerboard operations
|
||||
virtual void Mooee (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInv_shift (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag (const FermionField& in, FermionField& out);
|
||||
virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out);
|
||||
|
||||
virtual void M5D (const FermionField& psi, FermionField& chi);
|
||||
virtual void M5Ddag (const FermionField& psi, FermionField& chi);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Instantiate different versions depending on Impl
|
||||
/////////////////////////////////////////////////////
|
||||
void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs);
|
||||
|
||||
void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
|
||||
|
||||
void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs);
|
||||
|
||||
void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
|
||||
|
||||
void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
|
||||
Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
|
||||
|
||||
virtual void RefreshShiftCoefficients(RealD new_shift);
|
||||
|
||||
// Constructors
|
||||
MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
|
||||
GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
|
||||
RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
|
||||
RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
|
||||
|
||||
protected:
|
||||
void SetCoefficientsPrecondShiftOps(void);
|
||||
};
|
||||
}}
|
||||
|
||||
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
|
||||
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
|
||||
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
|
||||
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
|
||||
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
|
||||
|
||||
#undef MOBIUS_EOFA_DPERP_DENSE
|
||||
#define MOBIUS_EOFA_DPERP_CACHE
|
||||
#undef MOBIUS_EOFA_DPERP_LINALG
|
||||
#define MOBIUS_EOFA_DPERP_VEC
|
||||
|
||||
#endif
|
@ -1,429 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0){
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
for(int s=0; s<Ls; s++){
|
||||
auto tmp = psi._odata[0];
|
||||
if(s==0){
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5m(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5m(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5p(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
|
||||
else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
|
||||
chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi._grid;
|
||||
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
chi[ss+Ls-1] = zero;
|
||||
auto tmp = psi._odata[0];
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+Ls-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else if(s==(Ls-1)) {
|
||||
spProj5p(tmp, psi._odata[ss+0]);
|
||||
chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
} else {
|
||||
spProj5p(tmp, psi._odata[ss+s+1]);
|
||||
chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
|
||||
spProj5m(tmp, psi._odata[ss+s-1]);
|
||||
chi[ss+s] = chi[ss+s] + lower[s]*tmp;
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
|
||||
else{ spProj5m(tmp, psi._odata[ss+s]); }
|
||||
chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
||||
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp = psi._odata[0];
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5m(tmp, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
auto tmp2_spProj = psi._odata[0];
|
||||
|
||||
// Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
|
||||
chi[ss] = psi[ss]; // chi[0]=psi[0]
|
||||
tmp2 = MooeeInv_shift_lc[0]*psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5p(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
|
||||
tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
|
||||
else{ spProj5m(tmp2_spProj, tmp2); }
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
|
||||
}
|
||||
// chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
|
||||
|
||||
// Apply U^{-1} and add shift term
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
|
||||
spProj5m(tmp1, chi[ss+s]);
|
||||
chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
||||
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dag}
|
||||
chi[ss] = psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
|
||||
}
|
||||
|
||||
// U_m^{-\dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
|
||||
}
|
||||
|
||||
// L_m^{-\dag} D^{-dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
|
||||
// Apply L^{-dag}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
spProj5p(tmp, chi[ss+s+1]);
|
||||
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
GridBase *grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
|
||||
|
||||
auto tmp1 = psi._odata[0];
|
||||
auto tmp2 = psi._odata[0];
|
||||
auto tmp2_spProj = psi._odata[0];
|
||||
|
||||
// Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
|
||||
chi[ss] = psi[ss];
|
||||
tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
|
||||
for(int s=1; s<Ls; s++){
|
||||
spProj5m(tmp1, chi[ss+s-1]);
|
||||
chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
|
||||
tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
|
||||
}
|
||||
if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
|
||||
else{ spProj5m(tmp2_spProj, tmp2); }
|
||||
|
||||
// U_m^{-\dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
|
||||
}
|
||||
|
||||
// L_m^{-\dag} D^{-dag}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
spProj5m(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
|
||||
}
|
||||
chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
|
||||
spProj5p(tmp1, chi[ss+Ls-1]);
|
||||
chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
|
||||
|
||||
// Apply L^{-dag}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
|
||||
spProj5p(tmp1, chi[ss+s]);
|
||||
chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_CACHE
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,184 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
int pm = this->pm;
|
||||
RealD shift = this->shift;
|
||||
RealD alpha = this->alpha;
|
||||
RealD k = this->k;
|
||||
RealD mq1 = this->mq1;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
assert(Ls==LLs);
|
||||
|
||||
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||
|
||||
for(int s=0;s<Ls;s++){
|
||||
Pplus(s,s) = this->bee[s];
|
||||
Pminus(s,s) = this->bee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pminus(s,s+1) = -this->cee[s];
|
||||
}
|
||||
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
Pplus(s+1,s) = -this->cee[s+1];
|
||||
}
|
||||
Pplus (0,Ls-1) = mq1*this->cee[0];
|
||||
Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
|
||||
|
||||
if(shift != 0.0){
|
||||
Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
|
||||
for(int s=0; s<Ls; ++s){
|
||||
if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
|
||||
else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
|
||||
}
|
||||
}
|
||||
|
||||
Eigen::MatrixXd PplusMat ;
|
||||
Eigen::MatrixXd PminusMat;
|
||||
|
||||
if(inv){
|
||||
PplusMat = Pplus.inverse();
|
||||
PminusMat = Pminus.inverse();
|
||||
} else {
|
||||
PplusMat = Pplus;
|
||||
PminusMat = Pminus;
|
||||
}
|
||||
|
||||
if(dag){
|
||||
PplusMat.adjointInPlace();
|
||||
PminusMat.adjointInPlace();
|
||||
}
|
||||
|
||||
// For the non-vectorised s-direction this is simple
|
||||
|
||||
for(auto site=0; site<vol; site++){
|
||||
|
||||
SiteSpinor SiteChi;
|
||||
SiteHalfSpinor SitePplus;
|
||||
SiteHalfSpinor SitePminus;
|
||||
|
||||
for(int s1=0; s1<Ls; s1++){
|
||||
SiteChi = zero;
|
||||
for(int s2=0; s2<Ls; s2++){
|
||||
int lex2 = s2 + Ls*site;
|
||||
if(PplusMat(s1,s2) != 0.0){
|
||||
spProj5p(SitePplus,psi[lex2]);
|
||||
accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
|
||||
}
|
||||
if(PminusMat(s1,s2) != 0.0){
|
||||
spProj5m(SitePminus, psi[lex2]);
|
||||
accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
|
||||
}
|
||||
}
|
||||
chi[s1+Ls*site] = SiteChi*0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_DENSE
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,290 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
// Pminus fowards
|
||||
// Pplus backwards
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(s==0) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
|
||||
} else if (s==(Ls-1)) {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
} else {
|
||||
axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
|
||||
axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
|
||||
}
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
||||
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (L^{\prime})^{-1}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||
axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
|
||||
}
|
||||
|
||||
// L_m^{-1}
|
||||
for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||
axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// U_m^{-1} D^{-1}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply U^{-1} and add shift term
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls]
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
||||
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
// Apply (U^{\prime})^{-dagger}
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger}
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
Coeff_t one(1.0);
|
||||
Coeff_t czero(0.0);
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
int Ls = this->Ls;
|
||||
|
||||
FermionField tmp(psi._grid);
|
||||
|
||||
// Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
|
||||
axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0]
|
||||
axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
|
||||
for(int s=1; s<Ls; s++){
|
||||
axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
|
||||
axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
|
||||
}
|
||||
|
||||
// U_m^{-\dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
|
||||
}
|
||||
|
||||
// L_m^{-\dagger} D^{-dagger}
|
||||
for(int s=0; s<Ls-1; s++){
|
||||
axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
|
||||
}
|
||||
axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
|
||||
|
||||
// Apply L^{-dagger} and add shift
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
|
||||
for(int s=Ls-2; s>=0; s--){
|
||||
axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls]
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_LINALG
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,983 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/*
|
||||
* Dense matrix versions of routines
|
||||
*/
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
|
||||
{
|
||||
this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5m(hp, psi[ss+vp]);
|
||||
spProj5p(hm, psi[ss+vm]);
|
||||
|
||||
if (vp <= v){ rotate(hp, hp, 1); }
|
||||
if (vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = 0.5*hp;
|
||||
hm = 0.5*hm;
|
||||
|
||||
spRecon5m(fp, hp);
|
||||
spRecon5p(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v];
|
||||
chi[ss+v] = chi[ss+v] + u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] + l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
this->M5D(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
// FIXME: possible gain from vectorizing shift operation as well?
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
Vector<iSinglet<Simd>> s(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
scalar_type* s_p = (scalar_type*) &s[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
s_p[ss] = shift_coeffs[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
assert(Nc == 3);
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
int vs = (this->pm == 1) ? LLs-1 : 0;
|
||||
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
|
||||
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
|
||||
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
|
||||
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
|
||||
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
|
||||
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||
|
||||
if(vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == 1 && vs <= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == -1 && vs >= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
|
||||
}
|
||||
|
||||
// Can force these to real arithmetic and save 2x.
|
||||
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
|
||||
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
|
||||
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
|
||||
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
|
||||
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
|
||||
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
|
||||
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
}
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
#if 0
|
||||
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
alignas(64) SiteHalfSpinor hm;
|
||||
alignas(64) SiteSpinor fp;
|
||||
alignas(64) SiteSpinor fm;
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
int vp = (v+1)%LLs;
|
||||
int vm = (v+LLs-1)%LLs;
|
||||
|
||||
spProj5p(hp, psi[ss+vp]);
|
||||
spProj5m(hm, psi[ss+vm]);
|
||||
|
||||
if(vp <= v){ rotate(hp, hp, 1); }
|
||||
if(vm >= v){ rotate(hm, hm, nsimd-1); }
|
||||
|
||||
hp = hp*0.5;
|
||||
hm = hm*0.5;
|
||||
spRecon5p(fp, hp);
|
||||
spRecon5m(fm, hm);
|
||||
|
||||
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
|
||||
Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
|
||||
Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
|
||||
Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
|
||||
Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
|
||||
Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
this->M5Ddag(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
// FIXME: possible gain from vectorizing shift operation as well?
|
||||
Coeff_t one(1.0);
|
||||
int Ls = this->Ls;
|
||||
for(int s=0; s<Ls; s++){
|
||||
if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
|
||||
else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi._grid;
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
Vector<iSinglet<Simd>> u(LLs);
|
||||
Vector<iSinglet<Simd>> l(LLs);
|
||||
Vector<iSinglet<Simd>> d(LLs);
|
||||
Vector<iSinglet<Simd>> s(LLs);
|
||||
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.checkerboard == psi.checkerboard);
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
scalar_type* l_p = (scalar_type*) &l[0];
|
||||
scalar_type* d_p = (scalar_type*) &d[0];
|
||||
scalar_type* s_p = (scalar_type*) &s[0];
|
||||
|
||||
for(int o=0; o<LLs; o++){ // outer
|
||||
for(int i=0; i<nsimd; i++){ //inner
|
||||
int s = o + i*LLs;
|
||||
int ss = o*nsimd + i;
|
||||
u_p[ss] = upper[s];
|
||||
l_p[ss] = lower[s];
|
||||
d_p[ss] = diag[s];
|
||||
s_p[ss] = shift_coeffs[s];
|
||||
}}
|
||||
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
|
||||
|
||||
int vs = (this->pm == 1) ? LLs-1 : 0;
|
||||
Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
|
||||
Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
|
||||
Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
|
||||
Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
|
||||
Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
|
||||
Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
|
||||
|
||||
for(int v=0; v<LLs; v++){
|
||||
|
||||
vprefetch(psi[ss+v+LLs]);
|
||||
|
||||
int vp = (v == LLs-1) ? 0 : v+1;
|
||||
int vm = (v == 0 ) ? LLs-1 : v-1;
|
||||
|
||||
Simd hp_00 = psi[ss+vp]()(0)(0);
|
||||
Simd hp_01 = psi[ss+vp]()(0)(1);
|
||||
Simd hp_02 = psi[ss+vp]()(0)(2);
|
||||
Simd hp_10 = psi[ss+vp]()(1)(0);
|
||||
Simd hp_11 = psi[ss+vp]()(1)(1);
|
||||
Simd hp_12 = psi[ss+vp]()(1)(2);
|
||||
|
||||
Simd hm_00 = psi[ss+vm]()(2)(0);
|
||||
Simd hm_01 = psi[ss+vm]()(2)(1);
|
||||
Simd hm_02 = psi[ss+vm]()(2)(2);
|
||||
Simd hm_10 = psi[ss+vm]()(3)(0);
|
||||
Simd hm_11 = psi[ss+vm]()(3)(1);
|
||||
Simd hm_12 = psi[ss+vm]()(3)(2);
|
||||
|
||||
if (vp <= v){
|
||||
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == 1 && vs <= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
|
||||
}
|
||||
|
||||
if(vm >= v){
|
||||
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||
}
|
||||
|
||||
if(this->pm == -1 && vs >= v){
|
||||
hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
|
||||
hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
|
||||
hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
|
||||
hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
|
||||
hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
|
||||
hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
|
||||
}
|
||||
|
||||
Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
|
||||
Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
|
||||
Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
|
||||
Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
|
||||
Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
|
||||
Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
|
||||
Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
|
||||
Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
|
||||
Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
|
||||
Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
|
||||
Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
|
||||
Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
: switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
|
||||
+ switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
|
||||
|
||||
vstream(chi[ss+v]()(0)(0), p_00);
|
||||
vstream(chi[ss+v]()(0)(1), p_01);
|
||||
vstream(chi[ss+v]()(0)(2), p_02);
|
||||
vstream(chi[ss+v]()(1)(0), p_10);
|
||||
vstream(chi[ss+v]()(1)(1), p_11);
|
||||
vstream(chi[ss+v]()(1)(2), p_12);
|
||||
vstream(chi[ss+v]()(2)(0), p_20);
|
||||
vstream(chi[ss+v]()(2)(1), p_21);
|
||||
vstream(chi[ss+v]()(2)(2), p_22);
|
||||
vstream(chi[ss+v]()(3)(0), p_30);
|
||||
vstream(chi[ss+v]()(3)(1), p_31);
|
||||
vstream(chi[ss+v]()(3)(2), p_32);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef AVX512
|
||||
#include<simd/Intel512common.h>
|
||||
#include<simd/Intel512avx.h>
|
||||
#include<simd/Intel512single.h>
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
SiteHalfSpinor SiteChiP;
|
||||
SiteHalfSpinor SiteChiM;
|
||||
|
||||
// Ls*Ls * 2 * 12 * vol flops
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
int s = s2 + l*LLs;
|
||||
int lex = s2 + LLs*site;
|
||||
|
||||
if( s2==0 && l==0 ){
|
||||
SiteChiP=zero;
|
||||
SiteChiM=zero;
|
||||
}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
|
||||
}}
|
||||
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
|
||||
SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
|
||||
}}
|
||||
}}
|
||||
|
||||
{
|
||||
int lex = s1 + LLs*site;
|
||||
for(int sp=0; sp<2; sp++){
|
||||
for(int co=0; co<Nc; co++){
|
||||
vstream(chi[lex]()(sp)(co), SiteChiP()(sp)(co));
|
||||
vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
|
||||
}}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
// pointers
|
||||
// MASK_REGS;
|
||||
#define Chi_00 %%zmm1
|
||||
#define Chi_01 %%zmm2
|
||||
#define Chi_02 %%zmm3
|
||||
#define Chi_10 %%zmm4
|
||||
#define Chi_11 %%zmm5
|
||||
#define Chi_12 %%zmm6
|
||||
#define Chi_20 %%zmm7
|
||||
#define Chi_21 %%zmm8
|
||||
#define Chi_22 %%zmm9
|
||||
#define Chi_30 %%zmm10
|
||||
#define Chi_31 %%zmm11
|
||||
#define Chi_32 %%zmm12
|
||||
|
||||
#define BCAST0 %%zmm13
|
||||
#define BCAST1 %%zmm14
|
||||
#define BCAST2 %%zmm15
|
||||
#define BCAST3 %%zmm16
|
||||
#define BCAST4 %%zmm17
|
||||
#define BCAST5 %%zmm18
|
||||
#define BCAST6 %%zmm19
|
||||
#define BCAST7 %%zmm20
|
||||
#define BCAST8 %%zmm21
|
||||
#define BCAST9 %%zmm22
|
||||
#define BCAST10 %%zmm23
|
||||
#define BCAST11 %%zmm24
|
||||
|
||||
int incr = LLs*LLs*sizeof(iSinglet<Simd>);
|
||||
|
||||
for(int s1=0; s1<LLs; s1++){
|
||||
|
||||
for(int s2=0; s2<LLs; s2++){
|
||||
|
||||
int lex = s2 + LLs*site;
|
||||
uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
|
||||
uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
|
||||
uint64_t a2 = (uint64_t) &psi[lex];
|
||||
|
||||
for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
|
||||
|
||||
if((s2+l)==0) {
|
||||
asm(
|
||||
VPREFETCH1(0,%2) VPREFETCH1(0,%1)
|
||||
VPREFETCH1(12,%2) VPREFETCH1(13,%2)
|
||||
VPREFETCH1(14,%2) VPREFETCH1(15,%2)
|
||||
VBCASTCDUP(0,%2,BCAST0)
|
||||
VBCASTCDUP(1,%2,BCAST1)
|
||||
VBCASTCDUP(2,%2,BCAST2)
|
||||
VBCASTCDUP(3,%2,BCAST3)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMULMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMULMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMULMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMULMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMULMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMULMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMULMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMULMEM(0,%1,BCAST7,Chi_21)
|
||||
VMULMEM(0,%1,BCAST8,Chi_22)
|
||||
VMULMEM(0,%1,BCAST9,Chi_30)
|
||||
VMULMEM(0,%1,BCAST10,Chi_31)
|
||||
VMULMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
} else {
|
||||
asm(
|
||||
VBCASTCDUP(0,%2,BCAST0) VMADDMEM(0,%0,BCAST0,Chi_00)
|
||||
VBCASTCDUP(1,%2,BCAST1) VMADDMEM(0,%0,BCAST1,Chi_01)
|
||||
VBCASTCDUP(2,%2,BCAST2) VMADDMEM(0,%0,BCAST2,Chi_02)
|
||||
VBCASTCDUP(3,%2,BCAST3) VMADDMEM(0,%0,BCAST3,Chi_10)
|
||||
VBCASTCDUP(4,%2,BCAST4) VMADDMEM(0,%0,BCAST4,Chi_11)
|
||||
VBCASTCDUP(5,%2,BCAST5) VMADDMEM(0,%0,BCAST5,Chi_12)
|
||||
VBCASTCDUP(6,%2,BCAST6) VMADDMEM(0,%1,BCAST6,Chi_20)
|
||||
VBCASTCDUP(7,%2,BCAST7) VMADDMEM(0,%1,BCAST7,Chi_21)
|
||||
VBCASTCDUP(8,%2,BCAST8) VMADDMEM(0,%1,BCAST8,Chi_22)
|
||||
VBCASTCDUP(9,%2,BCAST9) VMADDMEM(0,%1,BCAST9,Chi_30)
|
||||
VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
|
||||
VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
|
||||
: : "r" (a0), "r" (a1), "r" (a2) );
|
||||
}
|
||||
|
||||
a0 = a0 + incr;
|
||||
a1 = a1 + incr;
|
||||
a2 = a2 + sizeof(typename Simd::scalar_type);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int lexa = s1+LLs*site;
|
||||
asm (
|
||||
VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01) VSTORE(2 ,%0,Chi_02)
|
||||
VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11) VSTORE(5 ,%0,Chi_12)
|
||||
VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21) VSTORE(8 ,%0,Chi_22)
|
||||
VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31) VSTORE(11,%0,Chi_32)
|
||||
: : "r" ((uint64_t)&chi[lexa]) : "memory" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef Chi_00
|
||||
#undef Chi_01
|
||||
#undef Chi_02
|
||||
#undef Chi_10
|
||||
#undef Chi_11
|
||||
#undef Chi_12
|
||||
#undef Chi_20
|
||||
#undef Chi_21
|
||||
#undef Chi_22
|
||||
#undef Chi_30
|
||||
#undef Chi_31
|
||||
#undef Chi_32
|
||||
|
||||
#undef BCAST0
|
||||
#undef BCAST1
|
||||
#undef BCAST2
|
||||
#undef BCAST3
|
||||
#undef BCAST4
|
||||
#undef BCAST5
|
||||
#undef BCAST6
|
||||
#undef BCAST7
|
||||
#undef BCAST8
|
||||
#undef BCAST9
|
||||
#undef BCAST10
|
||||
#undef BCAST11
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
// Z-mobius version
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
|
||||
exit(-1);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi._grid->_rdimensions[0];
|
||||
int vol = psi._grid->oSites()/LLs;
|
||||
|
||||
chi.checkerboard = psi.checkerboard;
|
||||
|
||||
Vector<iSinglet<Simd>> Matp;
|
||||
Vector<iSinglet<Simd>> Matm;
|
||||
Vector<iSinglet<Simd>>* _Matp;
|
||||
Vector<iSinglet<Simd>>* _Matm;
|
||||
|
||||
// MooeeInternalCompute(dag,inv,Matp,Matm);
|
||||
if(inv && dag){
|
||||
_Matp = &this->MatpInvDag;
|
||||
_Matm = &this->MatmInvDag;
|
||||
}
|
||||
|
||||
if(inv && (!dag)){
|
||||
_Matp = &this->MatpInv;
|
||||
_Matm = &this->MatmInv;
|
||||
}
|
||||
|
||||
if(!inv){
|
||||
MooeeInternalCompute(dag, inv, Matp, Matm);
|
||||
_Matp = &Matp;
|
||||
_Matm = &Matm;
|
||||
}
|
||||
|
||||
assert(_Matp->size() == Ls*LLs);
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
if(switcheroo<Coeff_t>::iscomplex()){
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
} else {
|
||||
parallel_for(auto site=0; site<vol; site++){
|
||||
MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
|
||||
}
|
||||
}
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
#ifdef MOBIUS_EOFA_DPERP_VEC
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
|
||||
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
|
||||
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
@ -1,95 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Field>
|
||||
class PauliVillarsSolverUnprec
|
||||
{
|
||||
public:
|
||||
ConjugateGradient<Field> & CG;
|
||||
PauliVillarsSolverUnprec( ConjugateGradient<Field> &_CG) : CG(_CG){};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
|
||||
{
|
||||
RealD m = _Matrix.Mass();
|
||||
Field A (_Matrix.FermionGrid());
|
||||
|
||||
MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
|
||||
|
||||
_Matrix.SetMass(1.0);
|
||||
_Matrix.Mdag(src,A);
|
||||
CG(HermOp,A,sol);
|
||||
_Matrix.SetMass(m);
|
||||
};
|
||||
};
|
||||
|
||||
template<class Field,class SchurSolverType>
|
||||
class PauliVillarsSolverRBprec
|
||||
{
|
||||
public:
|
||||
SchurSolverType & SchurSolver;
|
||||
PauliVillarsSolverRBprec( SchurSolverType &_SchurSolver) : SchurSolver(_SchurSolver){};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
|
||||
{
|
||||
RealD m = _Matrix.Mass();
|
||||
Field A (_Matrix.FermionGrid());
|
||||
|
||||
_Matrix.SetMass(1.0);
|
||||
SchurSolver(_Matrix,src,sol);
|
||||
_Matrix.SetMass(m);
|
||||
};
|
||||
};
|
||||
|
||||
template<class Field,class GaugeField>
|
||||
class PauliVillarsSolverFourierAccel
|
||||
{
|
||||
public:
|
||||
GaugeField & Umu;
|
||||
ConjugateGradient<Field> & CG;
|
||||
|
||||
PauliVillarsSolverFourierAccel(GaugeField &_Umu,ConjugateGradient<Field> &_CG) : Umu(_Umu), CG(_CG)
|
||||
{
|
||||
};
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix &_Matrix,const Field &src,Field &sol)
|
||||
{
|
||||
FourierAcceleratedPV<Field, Matrix, typename Matrix::GaugeField > faPV(_Matrix,Umu,CG) ;
|
||||
faPV.pvInv(src,sol);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
|
||||
private:
|
||||
PVinverter & PauliVillarsSolver;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// First cut works, 10 Oct 2018.
|
||||
//
|
||||
// Must form a plan to get this into production for Zmobius acceleration
|
||||
// of the Mobius exact AMA corrections.
|
||||
//
|
||||
// TODO : understand absence of contact term in eqns in Hantao's thesis
|
||||
// sol4 is contact term subtracted, but thesis & Brower's paper suggests not.
|
||||
//
|
||||
// Step 1: Localise PV inverse in a routine. [DONE]
|
||||
// Step 2: Schur based PV inverse [DONE]
|
||||
// Step 3: Fourier accelerated PV inverse [DONE]
|
||||
//
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
Reconstruct5DfromPhysical(PVinverter &_PauliVillarsSolver)
|
||||
: PauliVillarsSolver(_PauliVillarsSolver)
|
||||
{
|
||||
};
|
||||
|
||||
|
||||
template<class Matrix>
|
||||
void PV(Matrix &_Matrix,const Field &src,Field &sol)
|
||||
{
|
||||
RealD m = _Matrix.Mass();
|
||||
_Matrix.SetMass(1.0);
|
||||
_Matrix.M(src,sol);
|
||||
_Matrix.SetMass(m);
|
||||
}
|
||||
template<class Matrix>
|
||||
void PVdag(Matrix &_Matrix,const Field &src,Field &sol)
|
||||
{
|
||||
RealD m = _Matrix.Mass();
|
||||
_Matrix.SetMass(1.0);
|
||||
_Matrix.Mdag(src,sol);
|
||||
_Matrix.SetMass(m);
|
||||
}
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &sol4,const Field &src4, Field &sol5){
|
||||
|
||||
int Ls = _Matrix.Ls;
|
||||
|
||||
Field psi4(_Matrix.GaugeGrid());
|
||||
Field psi(_Matrix.FermionGrid());
|
||||
Field A (_Matrix.FermionGrid());
|
||||
Field B (_Matrix.FermionGrid());
|
||||
Field c (_Matrix.FermionGrid());
|
||||
|
||||
typedef typename Matrix::Coeff_t Coeff_t;
|
||||
|
||||
std::cout << GridLogMessage<< " ************************************************" << std::endl;
|
||||
std::cout << GridLogMessage<< " Reconstruct5Dprop: c.f. MADWF algorithm " << std::endl;
|
||||
std::cout << GridLogMessage<< " ************************************************" << std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
//Import source, include Dminus factors
|
||||
///////////////////////////////////////
|
||||
_Matrix.ImportPhysicalFermionSource(src4,B);
|
||||
|
||||
///////////////////////////////////////
|
||||
// Set up c from src4
|
||||
///////////////////////////////////////
|
||||
PauliVillarsSolver(_Matrix,B,A);
|
||||
_Matrix.Pdag(A,c);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
|
||||
//////////////////////////////////////
|
||||
psi4 = - sol4;
|
||||
InsertSlice(psi4, psi, 0 , 0);
|
||||
for (int s=1;s<Ls;s++) {
|
||||
ExtractSlice(psi4,c,s,0);
|
||||
InsertSlice(psi4,psi,s,0);
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
// Pdag PV^-1 Dm P
|
||||
/////////////////////////////
|
||||
_Matrix.P(psi,B);
|
||||
_Matrix.M(B,A);
|
||||
PauliVillarsSolver(_Matrix,A,B);
|
||||
_Matrix.Pdag(B,A);
|
||||
|
||||
//////////////////////////////
|
||||
// Reinsert surface prop
|
||||
//////////////////////////////
|
||||
InsertSlice(sol4,A,0,0);
|
||||
|
||||
//////////////////////////////
|
||||
// Convert from y back to x
|
||||
//////////////////////////////
|
||||
_Matrix.P(A,sol5);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: SchurDiagTwoKappa.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Christoph Lehner
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _SCHUR_DIAG_TWO_KAPPA_H
|
||||
#define _SCHUR_DIAG_TWO_KAPPA_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// This is specific to (Z)mobius fermions
|
||||
template<class Matrix, class Field>
|
||||
class KappaSimilarityTransform {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Matrix);
|
||||
std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag;
|
||||
|
||||
KappaSimilarityTransform (Matrix &zmob) {
|
||||
for (int i=0;i<(int)zmob.bs.size();i++) {
|
||||
Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) );
|
||||
kappa.push_back( k );
|
||||
kappaDag.push_back( conj(k) );
|
||||
kappaInv.push_back( 1.0 / k );
|
||||
kappaInvDag.push_back( 1.0 / conj(k) );
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj>
|
||||
void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) {
|
||||
GridBase *grid=out._grid;
|
||||
out.checkerboard = in.checkerboard;
|
||||
assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now
|
||||
int Ls = grid->_rdimensions[0];
|
||||
parallel_for(int ss=0;ss<grid->oSites();ss++){
|
||||
vobj tmp = s[ss % Ls]*in._odata[ss];
|
||||
vstream(out._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) {
|
||||
sscale(in,out,s);
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
virtual RealD M (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]); }
|
||||
virtual RealD MDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);}
|
||||
virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);}
|
||||
virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);}
|
||||
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoKappaOperator : public SchurOperatorBase<Field> {
|
||||
public:
|
||||
KappaSimilarityTransform<Matrix, Field> _S;
|
||||
SchurDiagTwoOperator<Matrix, Field> _Mat;
|
||||
|
||||
SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MInv(in,out);
|
||||
_Mat.Mpc(out,tmp);
|
||||
return _S.M(tmp,out);
|
||||
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_S.MDag(in,out);
|
||||
_Mat.MpcDag(out,tmp);
|
||||
return _S.MInvDag(tmp,out);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,294 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
|
||||
int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
|
||||
|
||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if (SE->_is_local ) { \
|
||||
if (SE->_permute) { \
|
||||
chi_p = χ \
|
||||
permute(chi, in._odata[SE->_offset], ptype); \
|
||||
} else { \
|
||||
chi_p = &in._odata[SE->_offset]; \
|
||||
} \
|
||||
} else { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir);
|
||||
|
||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if (SE->_is_local ) { \
|
||||
if (SE->_permute) { \
|
||||
chi_p = χ \
|
||||
permute(chi, in._odata[SE->_offset], ptype); \
|
||||
} else { \
|
||||
chi_p = &in._odata[SE->_offset]; \
|
||||
} \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
|
||||
}
|
||||
|
||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
nmu++; \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
// Int, Ext, Int+Ext cases for comms overlap
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
|
||||
GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
vstream(out._odata[sF], Uchi);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Only contributions from interior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
Uchi=zero;
|
||||
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
vstream(out._odata[sF], Uchi);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Only contributions from exterior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
Uchi=zero;
|
||||
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
out._odata[sF] = out._odata[sF] - Uchi;
|
||||
} else {
|
||||
out._odata[sF] = out._odata[sF] + Uchi;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Driving / wrapping routine to select right kernel
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=1;
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=0;
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out,
|
||||
int dag,int interior,int exterior)
|
||||
{
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
case OptInlineAsm:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else {
|
||||
std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteHand (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteGeneric (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout<<"Oops Opt = "<<Opt<<std::endl;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int disp)
|
||||
{
|
||||
// Disp should be either +1,-1,+3,-3
|
||||
// What about "dag" ?
|
||||
// Because we work out pU . dS/dU
|
||||
// U
|
||||
assert(0);
|
||||
}
|
||||
|
||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
|
||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
|
||||
|
||||
}}
|
||||
|
@ -1,122 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/StaggeredKernels.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi, Peter Boyle
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_STAGGERED_KERNELS_H
|
||||
#define GRID_QCD_STAGGERED_KERNELS_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Staggered stencil for a single site.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class StaggeredKernelsStatic {
|
||||
public:
|
||||
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
|
||||
enum { CommsAndCompute, CommsThenCompute };
|
||||
static int Opt;
|
||||
static int Comms;
|
||||
};
|
||||
|
||||
template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Nc kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Asm Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic interface; fan out to right routine
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||
|
||||
public:
|
||||
|
||||
StaggeredKernels(const ImplParams &p = ImplParams());
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
@ -1,399 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
|
||||
#define LOAD_CHI(b) \
|
||||
const SiteSpinor & ref (b[offset]); \
|
||||
Chi_0=ref()()(0);\
|
||||
Chi_1=ref()()(1);\
|
||||
Chi_2=ref()()(2);
|
||||
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT(A,UChi) \
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
||||
UChi ## _0 = U_00*Chi_0; \
|
||||
UChi ## _1 = U_10*Chi_0;\
|
||||
UChi ## _2 = U_20*Chi_0;\
|
||||
UChi ## _0 += U_01*Chi_1;\
|
||||
UChi ## _1 += U_11*Chi_1;\
|
||||
UChi ## _2 += U_21*Chi_1;\
|
||||
UChi ## _0 += U_02*Chi_2;\
|
||||
UChi ## _1 += U_12*Chi_2;\
|
||||
UChi ## _2 += U_22*Chi_2;
|
||||
|
||||
#define MULT_ADD(U,A,UChi) \
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
||||
UChi ## _0 += U_00*Chi_0; \
|
||||
UChi ## _1 += U_10*Chi_0;\
|
||||
UChi ## _2 += U_20*Chi_0;\
|
||||
UChi ## _0 += U_01*Chi_1;\
|
||||
UChi ## _1 += U_11*Chi_1;\
|
||||
UChi ## _2 += U_21*Chi_1;\
|
||||
UChi ## _0 += U_02*Chi_2;\
|
||||
UChi ## _1 += U_12*Chi_2;\
|
||||
UChi ## _2 += U_22*Chi_2;
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_0,Chi_0); \
|
||||
permute##dir(Chi_1,Chi_1); \
|
||||
permute##dir(Chi_2,Chi_2);
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHI(in._odata); \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(Perm); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI(buf); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
|
||||
HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
{ \
|
||||
MULT(Dir,even); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even) \
|
||||
HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
{ \
|
||||
MULT_ADD(U,Dir,even); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHI(in._odata); \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(Perm); \
|
||||
} \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
LOAD_CHI(buf); \
|
||||
} \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
MULT_ADD(U,Dir,even); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
nmu++; \
|
||||
{ LOAD_CHI(buf); } \
|
||||
{ MULT_ADD(U,Dir,even); } \
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_BEGIN(Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG (U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG (U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG (U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
|
||||
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
vstream(out._odata[sF],result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
even_0 = zero; even_1 = zero; even_2 = zero;
|
||||
odd_0 = zero; odd_1 = zero; odd_2 = zero;
|
||||
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
|
||||
|
||||
// Assume every site must be connected to at least one interior point. No 1^4 subvols.
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
vstream(out._odata[sF],result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
even_0 = zero; even_1 = zero; even_2 = zero;
|
||||
odd_0 = zero; odd_1 = zero; odd_2 = zero;
|
||||
int nmu=0;
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
|
||||
|
||||
// Add sum of all exterior connected stencil legs
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
out._odata[sF] = out._odata[sF] + result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
\
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
\
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,243 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
//#include <Grid/Eigen/Dense>
|
||||
#include <Grid/qcd/spin/Dirac.h>
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
namespace QCD
|
||||
{
|
||||
|
||||
// *NOT* EO
|
||||
template <class Impl>
|
||||
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField temp(out._grid);
|
||||
|
||||
// Wilson term
|
||||
out.checkerboard = in.checkerboard;
|
||||
this->Dhop(in, out, DaggerNo);
|
||||
|
||||
// Clover term
|
||||
Mooee(in, temp);
|
||||
|
||||
out += temp;
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
FermionField temp(out._grid);
|
||||
|
||||
// Wilson term
|
||||
out.checkerboard = in.checkerboard;
|
||||
this->Dhop(in, out, DaggerYes);
|
||||
|
||||
// Clover term
|
||||
MooeeDag(in, temp);
|
||||
|
||||
out += temp;
|
||||
return norm2(out);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
{
|
||||
WilsonFermion<Impl>::ImportGauge(_Umu);
|
||||
GridBase *grid = _Umu._grid;
|
||||
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
|
||||
|
||||
// Compute the field strength terms mu>nu
|
||||
WilsonLoops<Impl>::FieldStrength(Bx, _Umu, Zdir, Ydir);
|
||||
WilsonLoops<Impl>::FieldStrength(By, _Umu, Zdir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Bz, _Umu, Ydir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ex, _Umu, Tdir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ey, _Umu, Tdir, Ydir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ez, _Umu, Tdir, Zdir);
|
||||
|
||||
// Compute the Clover Operator acting on Colour and Spin
|
||||
// multiply here by the clover coefficients for the anisotropy
|
||||
CloverTerm = fillCloverYZ(Bx) * csw_r;
|
||||
CloverTerm += fillCloverXZ(By) * csw_r;
|
||||
CloverTerm += fillCloverXY(Bz) * csw_r;
|
||||
CloverTerm += fillCloverXT(Ex) * csw_t;
|
||||
CloverTerm += fillCloverYT(Ey) * csw_t;
|
||||
CloverTerm += fillCloverZT(Ez) * csw_t;
|
||||
CloverTerm += diag_mass;
|
||||
|
||||
int lvol = _Umu._grid->lSites();
|
||||
int DimRep = Impl::Dimension;
|
||||
|
||||
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||
|
||||
std::vector<int> lcoor;
|
||||
typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero;
|
||||
|
||||
for (int site = 0; site < lvol; site++)
|
||||
{
|
||||
grid->LocalIndexToLocalCoor(site, lcoor);
|
||||
EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
|
||||
peekLocalSite(Qx, CloverTerm, lcoor);
|
||||
Qxinv = zero;
|
||||
//if (csw!=0){
|
||||
for (int j = 0; j < Ns; j++)
|
||||
for (int k = 0; k < Ns; k++)
|
||||
for (int a = 0; a < DimRep; a++)
|
||||
for (int b = 0; b < DimRep; b++)
|
||||
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
|
||||
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
|
||||
|
||||
EigenInvCloverOp = EigenCloverOp.inverse();
|
||||
//std::cout << EigenInvCloverOp << std::endl;
|
||||
for (int j = 0; j < Ns; j++)
|
||||
for (int k = 0; k < Ns; k++)
|
||||
for (int a = 0; a < DimRep; a++)
|
||||
for (int b = 0; b < DimRep; b++)
|
||||
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
|
||||
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
|
||||
// }
|
||||
pokeLocalSite(Qxinv, CloverTermInv, lcoor);
|
||||
}
|
||||
|
||||
// Separate the even and odd parts
|
||||
pickCheckerboard(Even, CloverTermEven, CloverTerm);
|
||||
pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
|
||||
|
||||
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
|
||||
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
|
||||
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
|
||||
|
||||
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
|
||||
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
this->MooeeInternal(in, out, DaggerNo, InverseNo);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
this->MooeeInternal(in, out, DaggerYes, InverseNo);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
this->MooeeInternal(in, out, DaggerNo, InverseYes);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
this->MooeeInternal(in, out, DaggerYes, InverseYes);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
|
||||
{
|
||||
out.checkerboard = in.checkerboard;
|
||||
CloverFieldType *Clover;
|
||||
assert(in.checkerboard == Odd || in.checkerboard == Even);
|
||||
|
||||
if (dag)
|
||||
{
|
||||
if (in._grid->_isCheckerBoarded)
|
||||
{
|
||||
if (in.checkerboard == Odd)
|
||||
{
|
||||
Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd;
|
||||
}
|
||||
else
|
||||
{
|
||||
Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven;
|
||||
}
|
||||
out = *Clover * in;
|
||||
}
|
||||
else
|
||||
{
|
||||
Clover = (inv) ? &CloverTermInv : &CloverTerm;
|
||||
out = adj(*Clover) * in;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (in._grid->_isCheckerBoarded)
|
||||
{
|
||||
|
||||
if (in.checkerboard == Odd)
|
||||
{
|
||||
// std::cout << "Calling clover term Odd" << std::endl;
|
||||
Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd;
|
||||
}
|
||||
else
|
||||
{
|
||||
// std::cout << "Calling clover term Even" << std::endl;
|
||||
Clover = (inv) ? &CloverTermInvEven : &CloverTermEven;
|
||||
}
|
||||
out = *Clover * in;
|
||||
// std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
Clover = (inv) ? &CloverTermInv : &CloverTerm;
|
||||
out = *Clover * in;
|
||||
}
|
||||
}
|
||||
|
||||
} // MooeeInternal
|
||||
|
||||
|
||||
// Derivative parts
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
// Derivative parts
|
||||
template <class Impl>
|
||||
void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
|
||||
{
|
||||
assert(0); // not implemented yet
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(WilsonCloverFermion);
|
||||
AdjointFermOpTemplateInstantiate(WilsonCloverFermion);
|
||||
TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion);
|
||||
//GparityFermOpTemplateInstantiate(WilsonCloverFermion);
|
||||
}
|
||||
}
|
@ -1,366 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
Author: David Preti <>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H
|
||||
#define GRID_QCD_WILSON_CLOVER_FERMION_H
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
namespace QCD
|
||||
{
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Wilson Clover
|
||||
//
|
||||
// Operator ( with anisotropy coefficients):
|
||||
//
|
||||
// Q = 1 + (Nd-1)/xi_0 + m
|
||||
// + W_t + (nu/xi_0) * W_s
|
||||
// - 1/2*[ csw_t * sum_s (sigma_ts F_ts) + (csw_s/xi_0) * sum_ss (sigma_ss F_ss) ]
|
||||
//
|
||||
// s spatial, t temporal directions.
|
||||
// where W_t and W_s are the temporal and spatial components of the
|
||||
// Wilson Dirac operator
|
||||
//
|
||||
// csw_r = csw_t to recover the isotropic version
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
class WilsonCloverFermion : public WilsonFermion<Impl>
|
||||
{
|
||||
public:
|
||||
// Types definitions
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
template <typename vtype>
|
||||
using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
|
||||
typedef iImplClover<Simd> SiteCloverType;
|
||||
typedef Lattice<SiteCloverType> CloverFieldType;
|
||||
|
||||
public:
|
||||
typedef WilsonFermion<Impl> WilsonBase;
|
||||
|
||||
virtual void Instantiatable(void){};
|
||||
// Constructors
|
||||
WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid,
|
||||
const RealD _mass,
|
||||
const RealD _csw_r = 0.0,
|
||||
const RealD _csw_t = 0.0,
|
||||
const WilsonAnisotropyCoefficients &clover_anisotropy = WilsonAnisotropyCoefficients(),
|
||||
const ImplParams &impl_p = ImplParams()) : WilsonFermion<Impl>(_Umu,
|
||||
Fgrid,
|
||||
Hgrid,
|
||||
_mass, impl_p, clover_anisotropy),
|
||||
CloverTerm(&Fgrid),
|
||||
CloverTermInv(&Fgrid),
|
||||
CloverTermEven(&Hgrid),
|
||||
CloverTermOdd(&Hgrid),
|
||||
CloverTermInvEven(&Hgrid),
|
||||
CloverTermInvOdd(&Hgrid),
|
||||
CloverTermDagEven(&Hgrid),
|
||||
CloverTermDagOdd(&Hgrid),
|
||||
CloverTermInvDagEven(&Hgrid),
|
||||
CloverTermInvDagOdd(&Hgrid)
|
||||
{
|
||||
assert(Nd == 4); // require 4 dimensions
|
||||
|
||||
if (clover_anisotropy.isAnisotropic)
|
||||
{
|
||||
csw_r = _csw_r * 0.5 / clover_anisotropy.xi_0;
|
||||
diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
|
||||
}
|
||||
else
|
||||
{
|
||||
csw_r = _csw_r * 0.5;
|
||||
diag_mass = 4.0 + _mass;
|
||||
}
|
||||
csw_t = _csw_t * 0.5;
|
||||
|
||||
if (csw_r == 0)
|
||||
std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_r = 0" << std::endl;
|
||||
if (csw_t == 0)
|
||||
std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_t = 0" << std::endl;
|
||||
|
||||
ImportGauge(_Umu);
|
||||
}
|
||||
|
||||
virtual RealD M(const FermionField &in, FermionField &out);
|
||||
virtual RealD Mdag(const FermionField &in, FermionField &out);
|
||||
|
||||
virtual void Mooee(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv);
|
||||
|
||||
//virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
virtual void MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
virtual void MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
|
||||
|
||||
void ImportGauge(const GaugeField &_Umu);
|
||||
|
||||
// Derivative parts unpreconditioned pseudofermions
|
||||
void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
|
||||
{
|
||||
conformable(X._grid, Y._grid);
|
||||
conformable(X._grid, force._grid);
|
||||
GaugeLinkField force_mu(force._grid), lambda(force._grid);
|
||||
GaugeField clover_force(force._grid);
|
||||
PropagatorField Lambda(force._grid);
|
||||
|
||||
// Guido: Here we are hitting some performance issues:
|
||||
// need to extract the components of the DoubledGaugeField
|
||||
// for each call
|
||||
// Possible solution
|
||||
// Create a vector object to store them? (cons: wasting space)
|
||||
std::vector<GaugeLinkField> U(Nd, this->Umu._grid);
|
||||
|
||||
Impl::extractLinkField(U, this->Umu);
|
||||
|
||||
force = zero;
|
||||
// Derivative of the Wilson hopping term
|
||||
this->DhopDeriv(force, X, Y, dag);
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Clover term derivative
|
||||
///////////////////////////////////////////////////////////
|
||||
Impl::outerProductImpl(Lambda, X, Y);
|
||||
//std::cout << "Lambda:" << Lambda << std::endl;
|
||||
|
||||
Gamma::Algebra sigma[] = {
|
||||
Gamma::Algebra::SigmaXY,
|
||||
Gamma::Algebra::SigmaXZ,
|
||||
Gamma::Algebra::SigmaXT,
|
||||
Gamma::Algebra::MinusSigmaXY,
|
||||
Gamma::Algebra::SigmaYZ,
|
||||
Gamma::Algebra::SigmaYT,
|
||||
Gamma::Algebra::MinusSigmaXZ,
|
||||
Gamma::Algebra::MinusSigmaYZ,
|
||||
Gamma::Algebra::SigmaZT,
|
||||
Gamma::Algebra::MinusSigmaXT,
|
||||
Gamma::Algebra::MinusSigmaYT,
|
||||
Gamma::Algebra::MinusSigmaZT};
|
||||
|
||||
/*
|
||||
sigma_{\mu \nu}=
|
||||
| 0 sigma[0] sigma[1] sigma[2] |
|
||||
| sigma[3] 0 sigma[4] sigma[5] |
|
||||
| sigma[6] sigma[7] 0 sigma[8] |
|
||||
| sigma[9] sigma[10] sigma[11] 0 |
|
||||
*/
|
||||
|
||||
int count = 0;
|
||||
clover_force = zero;
|
||||
for (int mu = 0; mu < 4; mu++)
|
||||
{
|
||||
force_mu = zero;
|
||||
for (int nu = 0; nu < 4; nu++)
|
||||
{
|
||||
if (mu == nu)
|
||||
continue;
|
||||
|
||||
RealD factor;
|
||||
if (nu == 4 || mu == 4)
|
||||
{
|
||||
factor = 2.0 * csw_t;
|
||||
}
|
||||
else
|
||||
{
|
||||
factor = 2.0 * csw_r;
|
||||
}
|
||||
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
|
||||
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
|
||||
force_mu -= factor*Cmunu(U, lambda, mu, nu); // checked
|
||||
count++;
|
||||
}
|
||||
|
||||
pokeLorentz(clover_force, U[mu] * force_mu, mu);
|
||||
}
|
||||
//clover_force *= csw;
|
||||
force += clover_force;
|
||||
}
|
||||
|
||||
// Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis
|
||||
GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu)
|
||||
{
|
||||
conformable(lambda._grid, U[0]._grid);
|
||||
GaugeLinkField out(lambda._grid), tmp(lambda._grid);
|
||||
// insertion in upper staple
|
||||
// please check redundancy of shift operations
|
||||
|
||||
// C1+
|
||||
tmp = lambda * U[nu];
|
||||
out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
|
||||
|
||||
// C2+
|
||||
tmp = U[mu] * Impl::ShiftStaple(adj(lambda), mu);
|
||||
out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
|
||||
|
||||
// C3+
|
||||
tmp = U[nu] * Impl::ShiftStaple(adj(lambda), nu);
|
||||
out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu);
|
||||
|
||||
// C4+
|
||||
out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * lambda;
|
||||
|
||||
// insertion in lower staple
|
||||
// C1-
|
||||
out -= Impl::ShiftStaple(lambda, mu) * Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
|
||||
|
||||
// C2-
|
||||
tmp = adj(lambda) * U[nu];
|
||||
out -= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
|
||||
|
||||
// C3-
|
||||
tmp = lambda * U[nu];
|
||||
out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu);
|
||||
|
||||
// C4-
|
||||
out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * lambda;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
private:
|
||||
// here fixing the 4 dimensions, make it more general?
|
||||
|
||||
RealD csw_r; // Clover coefficient - spatial
|
||||
RealD csw_t; // Clover coefficient - temporal
|
||||
RealD diag_mass; // Mass term
|
||||
CloverFieldType CloverTerm, CloverTermInv; // Clover term
|
||||
CloverFieldType CloverTermEven, CloverTermOdd; // Clover term EO
|
||||
CloverFieldType CloverTermInvEven, CloverTermInvOdd; // Clover term Inv EO
|
||||
CloverFieldType CloverTermDagEven, CloverTermDagOdd; // Clover term Dag EO
|
||||
CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO
|
||||
|
||||
// eventually these can be compressed into 6x6 blocks instead of the 12x12
|
||||
// using the DeGrand-Rossi basis for the gamma matrices
|
||||
CloverFieldType fillCloverYZ(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
CloverFieldType fillCloverXZ(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
T._odata[i]()(0, 1) = -F._odata[i]()();
|
||||
T._odata[i]()(1, 0) = F._odata[i]()();
|
||||
T._odata[i]()(2, 3) = -F._odata[i]()();
|
||||
T._odata[i]()(3, 2) = F._odata[i]()();
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
CloverFieldType fillCloverXY(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
|
||||
T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(1, 1) = timesI(F._odata[i]()());
|
||||
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
CloverFieldType fillCloverXT(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
T._odata[i]()(0, 1) = timesI(F._odata[i]()());
|
||||
T._odata[i]()(1, 0) = timesI(F._odata[i]()());
|
||||
T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
CloverFieldType fillCloverYT(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
T._odata[i]()(0, 1) = -(F._odata[i]()());
|
||||
T._odata[i]()(1, 0) = (F._odata[i]()());
|
||||
T._odata[i]()(2, 3) = (F._odata[i]()());
|
||||
T._odata[i]()(3, 2) = -(F._odata[i]()());
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
|
||||
CloverFieldType fillCloverZT(const GaugeLinkField &F)
|
||||
{
|
||||
CloverFieldType T(F._grid);
|
||||
T = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int i = 0; i < CloverTerm._grid->oSites(); i++)
|
||||
{
|
||||
T._odata[i]()(0, 0) = timesI(F._odata[i]()());
|
||||
T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
|
||||
T._odata[i]()(3, 3) = timesI(F._odata[i]()());
|
||||
}
|
||||
|
||||
return T;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // GRID_QCD_WILSON_CLOVER_FERMION_H
|
@ -1,373 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonCompressor.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_WILSON_COMPRESSOR_H
|
||||
#define GRID_QCD_WILSON_COMPRESSOR_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// optimised versions supporting half precision too
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector,typename SFINAE = void >
|
||||
class WilsonCompressorTemplate;
|
||||
|
||||
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
||||
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||
typename std::enable_if<std::is_same<_HCspinor,_Hspinor>::value>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
int mu,dag;
|
||||
|
||||
void Point(int p) { mu=p; };
|
||||
|
||||
WilsonCompressorTemplate(int _dag=0){
|
||||
dag = _dag;
|
||||
}
|
||||
|
||||
typedef _Spinor SiteSpinor;
|
||||
typedef _Hspinor SiteHalfSpinor;
|
||||
typedef _HCspinor SiteHalfCommSpinor;
|
||||
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
|
||||
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
|
||||
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
|
||||
|
||||
inline int CommDatumSize(void) {
|
||||
return sizeof(SiteHalfCommSpinor);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) {
|
||||
SiteHalfSpinor tmp;
|
||||
projector::Proj(tmp,in,mu,dag);
|
||||
vstream(buf[o],tmp);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Exchange includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Exchange(SiteHalfSpinor * __restrict__ mp,
|
||||
const SiteHalfSpinor * __restrict__ vp0,
|
||||
const SiteHalfSpinor * __restrict__ vp1,
|
||||
Integer type,Integer o){
|
||||
SiteHalfSpinor tmp1;
|
||||
SiteHalfSpinor tmp2;
|
||||
exchange(tmp1,tmp2,vp0[o],vp1[o],type);
|
||||
vstream(mp[2*o ],tmp1);
|
||||
vstream(mp[2*o+1],tmp2);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Have a decompression step if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Decompress(SiteHalfSpinor * __restrict__ out,
|
||||
SiteHalfSpinor * __restrict__ in, Integer o) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress Exchange */
|
||||
/*****************************************************/
|
||||
inline void CompressExchange(SiteHalfSpinor * __restrict__ out0,
|
||||
SiteHalfSpinor * __restrict__ out1,
|
||||
const SiteSpinor * __restrict__ in,
|
||||
Integer j,Integer k, Integer m,Integer type){
|
||||
SiteHalfSpinor temp1, temp2,temp3,temp4;
|
||||
projector::Proj(temp1,in[k],mu,dag);
|
||||
projector::Proj(temp2,in[m],mu,dag);
|
||||
exchange(temp3,temp4,temp1,temp2,type);
|
||||
vstream(out0[j],temp3);
|
||||
vstream(out1[j],temp4);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Pass the info to the stencil */
|
||||
/*****************************************************/
|
||||
inline bool DecompressionStep(void) { return false; }
|
||||
|
||||
};
|
||||
|
||||
template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
|
||||
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
|
||||
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
|
||||
{
|
||||
public:
|
||||
|
||||
int mu,dag;
|
||||
|
||||
void Point(int p) { mu=p; };
|
||||
|
||||
WilsonCompressorTemplate(int _dag=0){
|
||||
dag = _dag;
|
||||
}
|
||||
|
||||
typedef _Spinor SiteSpinor;
|
||||
typedef _Hspinor SiteHalfSpinor;
|
||||
typedef _HCspinor SiteHalfCommSpinor;
|
||||
typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
|
||||
typedef typename SiteHalfSpinor::vector_type vComplexHigh;
|
||||
constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
|
||||
|
||||
inline int CommDatumSize(void) {
|
||||
return sizeof(SiteHalfCommSpinor);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) {
|
||||
SiteHalfSpinor hsp;
|
||||
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
|
||||
projector::Proj(hsp,in,mu,dag);
|
||||
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Exchange includes precision change if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Exchange(SiteHalfSpinor *mp,
|
||||
SiteHalfSpinor *vp0,
|
||||
SiteHalfSpinor *vp1,
|
||||
Integer type,Integer o){
|
||||
SiteHalfSpinor vt0,vt1;
|
||||
SiteHalfCommSpinor *vpp0 = (SiteHalfCommSpinor *)vp0;
|
||||
SiteHalfCommSpinor *vpp1 = (SiteHalfCommSpinor *)vp1;
|
||||
precisionChange((vComplexHigh *)&vt0,(vComplexLow *)&vpp0[o],Nw);
|
||||
precisionChange((vComplexHigh *)&vt1,(vComplexLow *)&vpp1[o],Nw);
|
||||
exchange(mp[2*o],mp[2*o+1],vt0,vt1,type);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Have a decompression step if mpi data is not same */
|
||||
/*****************************************************/
|
||||
inline void Decompress(SiteHalfSpinor *out,
|
||||
SiteHalfSpinor *in, Integer o){
|
||||
SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in;
|
||||
precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Compress Exchange */
|
||||
/*****************************************************/
|
||||
inline void CompressExchange(SiteHalfSpinor *out0,
|
||||
SiteHalfSpinor *out1,
|
||||
const SiteSpinor *in,
|
||||
Integer j,Integer k, Integer m,Integer type){
|
||||
SiteHalfSpinor temp1, temp2,temp3,temp4;
|
||||
SiteHalfCommSpinor *hout0 = (SiteHalfCommSpinor *)out0;
|
||||
SiteHalfCommSpinor *hout1 = (SiteHalfCommSpinor *)out1;
|
||||
projector::Proj(temp1,in[k],mu,dag);
|
||||
projector::Proj(temp2,in[m],mu,dag);
|
||||
exchange(temp3,temp4,temp1,temp2,type);
|
||||
precisionChange((vComplexLow *)&hout0[j],(vComplexHigh *)&temp3,Nw);
|
||||
precisionChange((vComplexLow *)&hout1[j],(vComplexHigh *)&temp4,Nw);
|
||||
}
|
||||
|
||||
/*****************************************************/
|
||||
/* Pass the info to the stencil */
|
||||
/*****************************************************/
|
||||
inline bool DecompressionStep(void) { return true; }
|
||||
|
||||
};
|
||||
|
||||
#define DECLARE_PROJ(Projector,Compressor,spProj) \
|
||||
class Projector { \
|
||||
public: \
|
||||
template<class hsp,class fsp> \
|
||||
static void Proj(hsp &result,const fsp &in,int mu,int dag){ \
|
||||
spProj(result,in); \
|
||||
} \
|
||||
}; \
|
||||
template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
|
||||
|
||||
DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
|
||||
DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
|
||||
DECLARE_PROJ(WilsonZpProjector,WilsonZpCompressor,spProjZp);
|
||||
DECLARE_PROJ(WilsonTpProjector,WilsonTpCompressor,spProjTp);
|
||||
DECLARE_PROJ(WilsonXmProjector,WilsonXmCompressor,spProjXm);
|
||||
DECLARE_PROJ(WilsonYmProjector,WilsonYmCompressor,spProjYm);
|
||||
DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
|
||||
DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
|
||||
|
||||
class WilsonProjector {
|
||||
public:
|
||||
template<class hsp,class fsp>
|
||||
static void Proj(hsp &result,const fsp &in,int mu,int dag){
|
||||
int mudag=dag? mu : (mu+Nd)%(2*Nd);
|
||||
switch(mudag) {
|
||||
case Xp: spProjXp(result,in); break;
|
||||
case Yp: spProjYp(result,in); break;
|
||||
case Zp: spProjZp(result,in); break;
|
||||
case Tp: spProjTp(result,in); break;
|
||||
case Xm: spProjXm(result,in); break;
|
||||
case Ym: spProjYm(result,in); break;
|
||||
case Zm: spProjZm(result,in); break;
|
||||
case Tm: spProjTm(result,in); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
}
|
||||
};
|
||||
template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCompressorTemplate<HCS,HS,S,WilsonProjector>;
|
||||
|
||||
// Fast comms buffer manipulation which should inline right through (avoid direction
|
||||
// dependent logic that prevents inlining
|
||||
template<class vobj,class cobj>
|
||||
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
||||
public:
|
||||
double timer0;
|
||||
double timer1;
|
||||
double timer2;
|
||||
double timer3;
|
||||
double timer4;
|
||||
double timer5;
|
||||
double timer6;
|
||||
uint64_t callsi;
|
||||
void ZeroCountersi(void)
|
||||
{
|
||||
timer0=0;
|
||||
timer1=0;
|
||||
timer2=0;
|
||||
timer3=0;
|
||||
timer4=0;
|
||||
timer5=0;
|
||||
timer6=0;
|
||||
callsi=0;
|
||||
}
|
||||
void Reporti(int calls)
|
||||
{
|
||||
if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl;
|
||||
if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate) " <<timer1/calls <<std::endl;
|
||||
if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge ) " <<timer2/calls <<std::endl;
|
||||
if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl;
|
||||
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
|
||||
}
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
int npoints,
|
||||
int checkerboard,
|
||||
const std::vector<int> &directions,
|
||||
const std::vector<int> &distances)
|
||||
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||
{
|
||||
ZeroCountersi();
|
||||
};
|
||||
|
||||
|
||||
template < class compressor>
|
||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
this->HaloExchangeOptGather(source,compress);
|
||||
double t1=usecond();
|
||||
// Asynchronous MPI calls multidirectional, Isend etc...
|
||||
// this->CommunicateBegin(reqs);
|
||||
// this->CommunicateComplete(reqs);
|
||||
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
|
||||
this->Communicate();
|
||||
double t2=usecond(); timer1 += t2-t1;
|
||||
this->CommsMerge(compress);
|
||||
double t3=usecond(); timer2 += t3-t2;
|
||||
this->CommsMergeSHM(compress);
|
||||
double t4=usecond(); timer3 += t4-t3;
|
||||
}
|
||||
|
||||
template <class compressor>
|
||||
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
this->Prepare();
|
||||
double t0=usecond();
|
||||
this->HaloGatherOpt(source,compress);
|
||||
double t1=usecond();
|
||||
timer0 += t1-t0;
|
||||
callsi++;
|
||||
}
|
||||
|
||||
template <class compressor>
|
||||
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
// Strategy. Inherit types from Compressor.
|
||||
// Use types to select the write direction by directon compressor
|
||||
typedef typename compressor::SiteSpinor SiteSpinor;
|
||||
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
|
||||
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
|
||||
|
||||
this->mpi3synctime_g-=usecond();
|
||||
this->_grid->StencilBarrier();
|
||||
this->mpi3synctime_g+=usecond();
|
||||
|
||||
assert(source._grid==this->_grid);
|
||||
this->halogtime-=usecond();
|
||||
|
||||
this->u_comm_offset=0;
|
||||
|
||||
WilsonXpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XpCompress;
|
||||
WilsonYpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YpCompress;
|
||||
WilsonZpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZpCompress;
|
||||
WilsonTpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TpCompress;
|
||||
WilsonXmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XmCompress;
|
||||
WilsonYmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YmCompress;
|
||||
WilsonZmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZmCompress;
|
||||
WilsonTmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TmCompress;
|
||||
|
||||
int dag = compress.dag;
|
||||
int face_idx=0;
|
||||
if ( dag ) {
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||
} else {
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
this->halogtime+=usecond();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}} // namespace close
|
||||
#endif
|
@ -1,455 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
|
||||
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
|
||||
|
||||
template <class Impl>
|
||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if (SE->_is_local) { \
|
||||
chi_p = χ \
|
||||
if (SE->_permute) { \
|
||||
spProj(tmp, in._odata[SE->_offset]); \
|
||||
permute(chi, tmp, ptype); \
|
||||
} else { \
|
||||
spProj(chi, in._odata[SE->_offset]); \
|
||||
} \
|
||||
} else { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
||||
Recon(result, Uchi);
|
||||
|
||||
#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if (SE->_is_local) { \
|
||||
chi_p = χ \
|
||||
if (SE->_permute) { \
|
||||
spProj(tmp, in._odata[SE->_offset]); \
|
||||
permute(chi, tmp, ptype); \
|
||||
} else { \
|
||||
spProj(chi, in._odata[SE->_offset]); \
|
||||
} \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
}
|
||||
|
||||
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
||||
if (gamma == Dir) { \
|
||||
if (SE->_is_local && SE->_permute) { \
|
||||
spProj(tmp, in._odata[SE->_offset]); \
|
||||
permute(chi, tmp, ptype); \
|
||||
} else if (SE->_is_local) { \
|
||||
spProj(chi, in._odata[SE->_offset]); \
|
||||
} else { \
|
||||
chi = buf[SE->_offset]; \
|
||||
} \
|
||||
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// All legs kernels ; comms then compute
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
|
||||
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
|
||||
vstream(out._odata[sF], result);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
|
||||
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
|
||||
vstream(out._odata[sF], result);
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Interior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
result=zero;
|
||||
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
|
||||
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
|
||||
vstream(out._odata[sF], result);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
result=zero;
|
||||
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
|
||||
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
|
||||
vstream(out._odata[sF], result);
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Exterior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
result=zero;
|
||||
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
|
||||
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
|
||||
if ( nmu ) {
|
||||
out._odata[sF] = out._odata[sF] + result;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
result=zero;
|
||||
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
|
||||
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
|
||||
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
|
||||
GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp);
|
||||
GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm);
|
||||
GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm);
|
||||
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
|
||||
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
|
||||
if ( nmu ) {
|
||||
out._odata[sF] = out._odata[sF] + result;
|
||||
}
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteSpinor result;
|
||||
SiteHalfSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
SE = st.GetEntry(ptype, dir, sF);
|
||||
GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
|
||||
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
|
||||
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
|
||||
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
|
||||
GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm);
|
||||
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
|
||||
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
|
||||
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
||||
vstream(out._odata[sF], result);
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Conserved current utilities for Wilson fermions, for contracting propagators
|
||||
* to make a conserved current sink or inserting the conserved current
|
||||
* sequentially. Common to both 4D and 5D.
|
||||
******************************************************************************/
|
||||
// N.B. Functions below assume a -1/2 factor within U.
|
||||
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
|
||||
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: ContractConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in_1 shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
|
||||
const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu);
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
||||
if (switch_sign)
|
||||
{
|
||||
q_out -= result;
|
||||
}
|
||||
else
|
||||
{
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: ContractConservedCurrentSiteBwd
|
||||
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in_2 shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
|
||||
const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd);
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
||||
if (switch_sign)
|
||||
{
|
||||
q_out += result;
|
||||
}
|
||||
else
|
||||
{
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
|
||||
// G-parity requires more specialised implementation.
|
||||
#define NO_CURR_SITE(Impl) \
|
||||
template <> \
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
|
||||
const SitePropagator &q_in_1, \
|
||||
const SitePropagator &q_in_2, \
|
||||
SitePropagator &q_out, \
|
||||
DoubledGaugeField &U, \
|
||||
unsigned int sU, \
|
||||
unsigned int mu, \
|
||||
bool switch_sign) \
|
||||
{ \
|
||||
assert(0); \
|
||||
} \
|
||||
template <> \
|
||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
|
||||
const SitePropagator &q_in_1, \
|
||||
const SitePropagator &q_in_2, \
|
||||
SitePropagator &q_out, \
|
||||
DoubledGaugeField &U, \
|
||||
unsigned int mu, \
|
||||
unsigned int sU, \
|
||||
bool switch_sign) \
|
||||
{ \
|
||||
assert(0); \
|
||||
}
|
||||
|
||||
NO_CURR_SITE(GparityWilsonImplF);
|
||||
NO_CURR_SITE(GparityWilsonImplD);
|
||||
NO_CURR_SITE(GparityWilsonImplFH);
|
||||
NO_CURR_SITE(GparityWilsonImplDF);
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: SeqConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in shifted in +ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result;
|
||||
Impl::multLinkProp(result, U._odata[sU], q_in, mu);
|
||||
result = WilsonCurrentFwd(result, mu);
|
||||
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign)
|
||||
{
|
||||
q_out -= result;
|
||||
}
|
||||
else
|
||||
{
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Name: SeqConservedCurrentSiteFwd
|
||||
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
|
||||
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
|
||||
* - Pass in q_in shifted in -ve mu direction.
|
||||
******************************************************************************/
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result;
|
||||
Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd);
|
||||
result = WilsonCurrentBwd(result, mu);
|
||||
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign)
|
||||
{
|
||||
q_out += result;
|
||||
}
|
||||
else
|
||||
{
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(WilsonKernels);
|
||||
AdjointFermOpTemplateInstantiate(WilsonKernels);
|
||||
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
|
||||
|
||||
}}
|
||||
|
@ -1,281 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_DHOP_H
|
||||
#define GRID_QCD_DHOP_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Wilson stencil for a single site.
|
||||
// Common to both the WilsonFermion and WilsonFermion5D
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class WilsonKernelsStatic {
|
||||
public:
|
||||
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
|
||||
enum { CommsAndCompute, CommsThenCompute };
|
||||
static int Opt;
|
||||
static int Comms;
|
||||
};
|
||||
|
||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::isFundamental==true && Nc == 3 &&EnableBool, void>::type
|
||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||
{
|
||||
bgq_l1p_optimisation(1);
|
||||
switch(Opt) {
|
||||
#if defined(AVX512) || defined (QPX)
|
||||
case OptInlineAsm:
|
||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else assert(0);
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
bgq_l1p_optimisation(0);
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool, void>::type
|
||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
|
||||
// no kernel choice
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::isFundamental==true && Nc == 3 && EnableBool,void>::type
|
||||
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||
{
|
||||
bgq_l1p_optimisation(1);
|
||||
switch(Opt) {
|
||||
#if defined(AVX512) || defined (QPX)
|
||||
case OptInlineAsm:
|
||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
else assert(0);
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
bgq_l1p_optimisation(0);
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool,void>::type
|
||||
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
|
||||
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Utilities for inserting Wilson conserved current.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign = false);
|
||||
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign = false);
|
||||
|
||||
private:
|
||||
// Specialised variants
|
||||
void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
public:
|
||||
|
||||
WilsonKernels(const ImplParams &p = ImplParams());
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
@ -1,196 +0,0 @@
|
||||
#ifdef KERNEL_DAG
|
||||
#define DIR0_PROJMEM(base) XP_PROJMEM(base);
|
||||
#define DIR1_PROJMEM(base) YP_PROJMEM(base);
|
||||
#define DIR2_PROJMEM(base) ZP_PROJMEM(base);
|
||||
#define DIR3_PROJMEM(base) TP_PROJMEM(base);
|
||||
#define DIR4_PROJMEM(base) XM_PROJMEM(base);
|
||||
#define DIR5_PROJMEM(base) YM_PROJMEM(base);
|
||||
#define DIR6_PROJMEM(base) ZM_PROJMEM(base);
|
||||
#define DIR7_PROJMEM(base) TM_PROJMEM(base);
|
||||
#define DIR0_RECON XP_RECON
|
||||
#define DIR1_RECON YP_RECON_ACCUM
|
||||
#define DIR2_RECON ZP_RECON_ACCUM
|
||||
#define DIR3_RECON TP_RECON_ACCUM
|
||||
#define DIR4_RECON XM_RECON_ACCUM
|
||||
#define DIR5_RECON YM_RECON_ACCUM
|
||||
#define DIR6_RECON ZM_RECON_ACCUM
|
||||
#define DIR7_RECON TM_RECON_ACCUM
|
||||
#else
|
||||
#define DIR0_PROJMEM(base) XM_PROJMEM(base);
|
||||
#define DIR1_PROJMEM(base) YM_PROJMEM(base);
|
||||
#define DIR2_PROJMEM(base) ZM_PROJMEM(base);
|
||||
#define DIR3_PROJMEM(base) TM_PROJMEM(base);
|
||||
#define DIR4_PROJMEM(base) XP_PROJMEM(base);
|
||||
#define DIR5_PROJMEM(base) YP_PROJMEM(base);
|
||||
#define DIR6_PROJMEM(base) ZP_PROJMEM(base);
|
||||
#define DIR7_PROJMEM(base) TP_PROJMEM(base);
|
||||
#define DIR0_RECON XM_RECON
|
||||
#define DIR1_RECON YM_RECON_ACCUM
|
||||
#define DIR2_RECON ZM_RECON_ACCUM
|
||||
#define DIR3_RECON TM_RECON_ACCUM
|
||||
#define DIR4_RECON XP_RECON_ACCUM
|
||||
#define DIR5_RECON YP_RECON_ACCUM
|
||||
#define DIR6_RECON ZP_RECON_ACCUM
|
||||
#define DIR7_RECON TP_RECON_ACCUM
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Comms then compute kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef INTERIOR_AND_EXTERIOR
|
||||
|
||||
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
basep = st.GetPFInfo(nent,plocal); nent++; \
|
||||
if ( local ) { \
|
||||
LOAD64(%r10,isigns); \
|
||||
PROJ(base); \
|
||||
MAYBEPERM(PERMUTE_DIR,perm); \
|
||||
} else { \
|
||||
LOAD_CHI(base); \
|
||||
} \
|
||||
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
||||
PREFETCH_CHIMU(base); \
|
||||
MULT_2SPIN_DIR_PF(Dir,basep); \
|
||||
LOAD64(%r10,isigns); \
|
||||
RECON; \
|
||||
|
||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||
PF_GAUGE(Xp); \
|
||||
PREFETCH1_CHIMU(base); \
|
||||
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
|
||||
|
||||
#define RESULT(base,basep) SAVE_RESULT(base,basep);
|
||||
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Pre comms kernel -- prefetch like normal because it is mostly right
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef INTERIOR
|
||||
|
||||
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
basep = st.GetPFInfo(nent,plocal); nent++; \
|
||||
if ( local ) { \
|
||||
LOAD64(%r10,isigns); \
|
||||
PROJ(base); \
|
||||
MAYBEPERM(PERMUTE_DIR,perm); \
|
||||
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
|
||||
if ( local || st.same_node[Dir] ) { \
|
||||
MULT_2SPIN_DIR_PF(Dir,basep); \
|
||||
LOAD64(%r10,isigns); \
|
||||
RECON; \
|
||||
} \
|
||||
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
||||
PREFETCH_CHIMU(base); \
|
||||
|
||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||
PF_GAUGE(Xp); \
|
||||
PREFETCH1_CHIMU(base); \
|
||||
{ ZERO_PSI; } \
|
||||
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
|
||||
|
||||
#define RESULT(base,basep) SAVE_RESULT(base,basep);
|
||||
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Post comms kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef EXTERIOR
|
||||
|
||||
|
||||
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_DIR_PF(Dir,base); \
|
||||
LOAD64(%r10,isigns); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||
nmu=0; \
|
||||
{ ZERO_PSI;} \
|
||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_DIR_PF(Dir,base); \
|
||||
LOAD64(%r10,isigns); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
|
||||
|
||||
#endif
|
||||
{
|
||||
int nmu;
|
||||
int local,perm, ptype;
|
||||
uint64_t base;
|
||||
uint64_t basep;
|
||||
const uint64_t plocal =(uint64_t) & in._odata[0];
|
||||
|
||||
COMPLEX_SIGNS(isigns);
|
||||
MASK_REGS;
|
||||
int nmax=U._grid->oSites();
|
||||
for(int site=0;site<Ns;site++) {
|
||||
#ifndef EXTERIOR
|
||||
int sU =lo.Reorder(ssU);
|
||||
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
||||
int sUn=lo.Reorder(ssn);
|
||||
LOCK_GAUGE(0);
|
||||
#else
|
||||
int sU =ssU;
|
||||
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
||||
int sUn=ssn;
|
||||
#endif
|
||||
for(int s=0;s<Ls;s++) {
|
||||
ss =sU*Ls+s;
|
||||
ssn=sUn*Ls+s;
|
||||
int ent=ss*8;// 2*Ndim
|
||||
int nent=ssn*8;
|
||||
|
||||
ASM_LEG_XP(Xp,Yp,PERMUTE_DIR3,DIR0_PROJMEM,DIR0_RECON);
|
||||
ASM_LEG(Yp,Zp,PERMUTE_DIR2,DIR1_PROJMEM,DIR1_RECON);
|
||||
ASM_LEG(Zp,Tp,PERMUTE_DIR1,DIR2_PROJMEM,DIR2_RECON);
|
||||
ASM_LEG(Tp,Xm,PERMUTE_DIR0,DIR3_PROJMEM,DIR3_RECON);
|
||||
|
||||
ASM_LEG(Xm,Ym,PERMUTE_DIR3,DIR4_PROJMEM,DIR4_RECON);
|
||||
ASM_LEG(Ym,Zm,PERMUTE_DIR2,DIR5_PROJMEM,DIR5_RECON);
|
||||
ASM_LEG(Zm,Tm,PERMUTE_DIR1,DIR6_PROJMEM,DIR6_RECON);
|
||||
ASM_LEG(Tm,Xp,PERMUTE_DIR0,DIR7_PROJMEM,DIR7_RECON);
|
||||
|
||||
#ifdef EXTERIOR
|
||||
if (nmu==0) break;
|
||||
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
|
||||
#endif
|
||||
base = (uint64_t) &out._odata[ss];
|
||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||
RESULT(base,basep);
|
||||
}
|
||||
ssU++;
|
||||
UNLOCK_GAUGE(0);
|
||||
}
|
||||
}
|
||||
|
||||
#undef DIR0_PROJMEM
|
||||
#undef DIR1_PROJMEM
|
||||
#undef DIR2_PROJMEM
|
||||
#undef DIR3_PROJMEM
|
||||
#undef DIR4_PROJMEM
|
||||
#undef DIR5_PROJMEM
|
||||
#undef DIR6_PROJMEM
|
||||
#undef DIR7_PROJMEM
|
||||
#undef DIR0_RECON
|
||||
#undef DIR1_RECON
|
||||
#undef DIR2_RECON
|
||||
#undef DIR3_RECON
|
||||
#undef DIR4_RECON
|
||||
#undef DIR5_RECON
|
||||
#undef DIR6_RECON
|
||||
#undef DIR7_RECON
|
||||
#undef ASM_LEG
|
||||
#undef ASM_LEG_XP
|
||||
#undef RESULT
|
@ -1,631 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU \
|
||||
{const SiteSpinor & ref (in._odata[offset]); \
|
||||
Chimu_00=ref()(0)(0);\
|
||||
Chimu_01=ref()(0)(1);\
|
||||
Chimu_02=ref()(0)(2);\
|
||||
Chimu_10=ref()(1)(0);\
|
||||
Chimu_11=ref()(1)(1);\
|
||||
Chimu_12=ref()(1)(2);\
|
||||
Chimu_20=ref()(2)(0);\
|
||||
Chimu_21=ref()(2)(1);\
|
||||
Chimu_22=ref()(2)(2);\
|
||||
Chimu_30=ref()(3)(0);\
|
||||
Chimu_31=ref()(3)(1);\
|
||||
Chimu_32=ref()(3)(2);}
|
||||
|
||||
#define LOAD_CHI\
|
||||
{const SiteHalfSpinor &ref(buf[offset]); \
|
||||
Chi_00 = ref()(0)(0);\
|
||||
Chi_01 = ref()(0)(1);\
|
||||
Chi_02 = ref()(0)(2);\
|
||||
Chi_10 = ref()(1)(0);\
|
||||
Chi_11 = ref()(1)(1);\
|
||||
Chi_12 = ref()(1)(2);}
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN(A)\
|
||||
{auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00;\
|
||||
UChi_10 = U_00*Chi_10;\
|
||||
UChi_01 = U_10*Chi_00;\
|
||||
UChi_11 = U_10*Chi_10;\
|
||||
UChi_02 = U_20*Chi_00;\
|
||||
UChi_12 = U_20*Chi_10;\
|
||||
UChi_00+= U_01*Chi_01;\
|
||||
UChi_10+= U_01*Chi_11;\
|
||||
UChi_01+= U_11*Chi_01;\
|
||||
UChi_11+= U_11*Chi_11;\
|
||||
UChi_02+= U_21*Chi_01;\
|
||||
UChi_12+= U_21*Chi_11;\
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02;\
|
||||
UChi_10+= U_00*Chi_12;\
|
||||
UChi_01+= U_10*Chi_02;\
|
||||
UChi_11+= U_10*Chi_12;\
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;}
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_00,Chi_00);\
|
||||
permute##dir(Chi_01,Chi_01);\
|
||||
permute##dir(Chi_02,Chi_02);\
|
||||
permute##dir(Chi_10,Chi_10);\
|
||||
permute##dir(Chi_11,Chi_11);\
|
||||
permute##dir(Chi_12,Chi_12);
|
||||
|
||||
// hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
#define XP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01+timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02+timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10+timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11+timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12+timesI(Chimu_22);
|
||||
|
||||
#define YP_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_30;\
|
||||
Chi_01 = Chimu_01-Chimu_31;\
|
||||
Chi_02 = Chimu_02-Chimu_32;\
|
||||
Chi_10 = Chimu_10+Chimu_20;\
|
||||
Chi_11 = Chimu_11+Chimu_21;\
|
||||
Chi_12 = Chimu_12+Chimu_22;
|
||||
|
||||
#define ZP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01+timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02+timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10-timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11-timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12-timesI(Chimu_32);
|
||||
|
||||
#define TP_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_20; \
|
||||
Chi_01 = Chimu_01+Chimu_21; \
|
||||
Chi_02 = Chimu_02+Chimu_22; \
|
||||
Chi_10 = Chimu_10+Chimu_30; \
|
||||
Chi_11 = Chimu_11+Chimu_31; \
|
||||
Chi_12 = Chimu_12+Chimu_32;
|
||||
|
||||
|
||||
// hspin(0)=fspin(0)-timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)-timesI(fspin(2));
|
||||
#define XM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01-timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02-timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10-timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11-timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12-timesI(Chimu_22);
|
||||
|
||||
#define YM_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_30;\
|
||||
Chi_01 = Chimu_01+Chimu_31;\
|
||||
Chi_02 = Chimu_02+Chimu_32;\
|
||||
Chi_10 = Chimu_10-Chimu_20;\
|
||||
Chi_11 = Chimu_11-Chimu_21;\
|
||||
Chi_12 = Chimu_12-Chimu_22;
|
||||
|
||||
#define ZM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01-timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02-timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10+timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11+timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12+timesI(Chimu_32);
|
||||
|
||||
#define TM_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_20; \
|
||||
Chi_01 = Chimu_01-Chimu_21; \
|
||||
Chi_02 = Chimu_02-Chimu_22; \
|
||||
Chi_10 = Chimu_10-Chimu_30; \
|
||||
Chi_11 = Chimu_11-Chimu_31; \
|
||||
Chi_12 = Chimu_12-Chimu_32;
|
||||
|
||||
// fspin(0)=hspin(0);
|
||||
// fspin(1)=hspin(1);
|
||||
// fspin(2)=timesMinusI(hspin(1));
|
||||
// fspin(3)=timesMinusI(hspin(0));
|
||||
#define XP_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesMinusI(UChi_10);\
|
||||
result_21 = timesMinusI(UChi_11);\
|
||||
result_22 = timesMinusI(UChi_12);\
|
||||
result_30 = timesMinusI(UChi_00);\
|
||||
result_31 = timesMinusI(UChi_01);\
|
||||
result_32 = timesMinusI(UChi_02);
|
||||
|
||||
#define XP_RECON_ACCUM\
|
||||
result_00+=UChi_00;\
|
||||
result_01+=UChi_01;\
|
||||
result_02+=UChi_02;\
|
||||
result_10+=UChi_10;\
|
||||
result_11+=UChi_11;\
|
||||
result_12+=UChi_12;\
|
||||
result_20-=timesI(UChi_10);\
|
||||
result_21-=timesI(UChi_11);\
|
||||
result_22-=timesI(UChi_12);\
|
||||
result_30-=timesI(UChi_00);\
|
||||
result_31-=timesI(UChi_01);\
|
||||
result_32-=timesI(UChi_02);
|
||||
|
||||
#define XM_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesI(UChi_10);\
|
||||
result_21 = timesI(UChi_11);\
|
||||
result_22 = timesI(UChi_12);\
|
||||
result_30 = timesI(UChi_00);\
|
||||
result_31 = timesI(UChi_01);\
|
||||
result_32 = timesI(UChi_02);
|
||||
|
||||
#define XM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_10);\
|
||||
result_21+= timesI(UChi_11);\
|
||||
result_22+= timesI(UChi_12);\
|
||||
result_30+= timesI(UChi_00);\
|
||||
result_31+= timesI(UChi_01);\
|
||||
result_32+= timesI(UChi_02);
|
||||
|
||||
#define YP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_10;\
|
||||
result_21+= UChi_11;\
|
||||
result_22+= UChi_12;\
|
||||
result_30-= UChi_00;\
|
||||
result_31-= UChi_01;\
|
||||
result_32-= UChi_02;
|
||||
|
||||
#define YM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_10;\
|
||||
result_21-= UChi_11;\
|
||||
result_22-= UChi_12;\
|
||||
result_30+= UChi_00;\
|
||||
result_31+= UChi_01;\
|
||||
result_32+= UChi_02;
|
||||
|
||||
#define ZP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= timesI(UChi_00); \
|
||||
result_21-= timesI(UChi_01); \
|
||||
result_22-= timesI(UChi_02); \
|
||||
result_30+= timesI(UChi_10); \
|
||||
result_31+= timesI(UChi_11); \
|
||||
result_32+= timesI(UChi_12);
|
||||
|
||||
#define ZM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_00); \
|
||||
result_21+= timesI(UChi_01); \
|
||||
result_22+= timesI(UChi_02); \
|
||||
result_30-= timesI(UChi_10); \
|
||||
result_31-= timesI(UChi_11); \
|
||||
result_32-= timesI(UChi_12);
|
||||
|
||||
#define TP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_00; \
|
||||
result_21+= UChi_01; \
|
||||
result_22+= UChi_02; \
|
||||
result_30+= UChi_10; \
|
||||
result_31+= UChi_11; \
|
||||
result_32+= UChi_12;
|
||||
|
||||
#define TM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_00; \
|
||||
result_21-= UChi_01; \
|
||||
result_22-= UChi_02; \
|
||||
result_30-= UChi_10; \
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI; \
|
||||
} \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON;
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI; \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI; \
|
||||
MULT_2SPIN(DIR); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref()(0)(0),result_00); \
|
||||
vstream(ref()(0)(1),result_01); \
|
||||
vstream(ref()(0)(2),result_02); \
|
||||
vstream(ref()(1)(0),result_10); \
|
||||
vstream(ref()(1)(1),result_11); \
|
||||
vstream(ref()(1)(2),result_12); \
|
||||
vstream(ref()(2)(0),result_20); \
|
||||
vstream(ref()(2)(1),result_21); \
|
||||
vstream(ref()(2)(2),result_22); \
|
||||
vstream(ref()(3)(0),result_30); \
|
||||
vstream(ref()(3)(1),result_31); \
|
||||
vstream(ref()(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref()(0)(0)+=result_00; \
|
||||
ref()(0)(1)+=result_01; \
|
||||
ref()(0)(2)+=result_02; \
|
||||
ref()(1)(0)+=result_10; \
|
||||
ref()(1)(1)+=result_11; \
|
||||
ref()(1)(2)+=result_12; \
|
||||
ref()(2)(0)+=result_20; \
|
||||
ref()(2)(1)+=result_21; \
|
||||
ref()(2)(2)+=result_22; \
|
||||
ref()(3)(0)+=result_30; \
|
||||
ref()(3)(1)+=result_31; \
|
||||
ref()(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
#define HAND_DECLARATIONS(a) \
|
||||
Simd result_00; \
|
||||
Simd result_01; \
|
||||
Simd result_02; \
|
||||
Simd result_10; \
|
||||
Simd result_11; \
|
||||
Simd result_12; \
|
||||
Simd result_20; \
|
||||
Simd result_21; \
|
||||
Simd result_22; \
|
||||
Simd result_30; \
|
||||
Simd result_31; \
|
||||
Simd result_32; \
|
||||
Simd Chi_00; \
|
||||
Simd Chi_01; \
|
||||
Simd Chi_02; \
|
||||
Simd Chi_10; \
|
||||
Simd Chi_11; \
|
||||
Simd Chi_12; \
|
||||
Simd UChi_00; \
|
||||
Simd UChi_01; \
|
||||
Simd UChi_02; \
|
||||
Simd UChi_10; \
|
||||
Simd UChi_11; \
|
||||
Simd UChi_12; \
|
||||
Simd U_00; \
|
||||
Simd U_10; \
|
||||
Simd U_20; \
|
||||
Simd U_01; \
|
||||
Simd U_11; \
|
||||
Simd U_21;
|
||||
|
||||
#define ZERO_RESULT \
|
||||
result_00=zero; \
|
||||
result_01=zero; \
|
||||
result_02=zero; \
|
||||
result_10=zero; \
|
||||
result_11=zero; \
|
||||
result_12=zero; \
|
||||
result_20=zero; \
|
||||
result_21=zero; \
|
||||
result_22=zero; \
|
||||
result_30=zero; \
|
||||
result_31=zero; \
|
||||
result_32=zero;
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
}
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
INSTANTIATE_THEM(WilsonImplF);
|
||||
INSTANTIATE_THEM(WilsonImplD);
|
||||
INSTANTIATE_THEM(ZWilsonImplF);
|
||||
INSTANTIATE_THEM(ZWilsonImplD);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplF);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
|
||||
INSTANTIATE_THEM(WilsonImplFH);
|
||||
INSTANTIATE_THEM(WilsonImplDF);
|
||||
INSTANTIATE_THEM(ZWilsonImplFH);
|
||||
INSTANTIATE_THEM(ZWilsonImplDF);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplFH);
|
||||
INSTANTIATE_THEM(DomainWallVec5dImplDF);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
|
||||
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
|
||||
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF);
|
||||
INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD);
|
||||
|
||||
}}
|
@ -1,878 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU_BODY(F) \
|
||||
Chimu_00=ref(F)(0)(0); \
|
||||
Chimu_01=ref(F)(0)(1); \
|
||||
Chimu_02=ref(F)(0)(2); \
|
||||
Chimu_10=ref(F)(1)(0); \
|
||||
Chimu_11=ref(F)(1)(1); \
|
||||
Chimu_12=ref(F)(1)(2); \
|
||||
Chimu_20=ref(F)(2)(0); \
|
||||
Chimu_21=ref(F)(2)(1); \
|
||||
Chimu_22=ref(F)(2)(2); \
|
||||
Chimu_30=ref(F)(3)(0); \
|
||||
Chimu_31=ref(F)(3)(1); \
|
||||
Chimu_32=ref(F)(3)(2)
|
||||
|
||||
#define LOAD_CHIMU(DIR,F,PERM) \
|
||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||
|
||||
#define LOAD_CHI_BODY(F) \
|
||||
Chi_00 = ref(F)(0)(0);\
|
||||
Chi_01 = ref(F)(0)(1);\
|
||||
Chi_02 = ref(F)(0)(2);\
|
||||
Chi_10 = ref(F)(1)(0);\
|
||||
Chi_11 = ref(F)(1)(1);\
|
||||
Chi_12 = ref(F)(1)(2)
|
||||
|
||||
#define LOAD_CHI(DIR,F,PERM) \
|
||||
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||
|
||||
|
||||
//G-parity implementations using in-place intrinsic ops
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
//0h,1l -> 1l,0h
|
||||
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
|
||||
//Pulled fermion through forwards face, GPBC on upper component
|
||||
//Need 0= 0l 1h 1= 1l 0h
|
||||
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
|
||||
//Pulled fermion through backwards face, GPBC on lower component
|
||||
//Need 0= 1l 0h 1= 0l 1h
|
||||
|
||||
//1l 1h -> 1h 1l
|
||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
|
||||
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(1)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
//0l 0h -> 0h 0l
|
||||
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
|
||||
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
|
||||
permute##PERM(tmp1, ref(0)(S)(C)); \
|
||||
exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
|
||||
INTO = tmp2;
|
||||
|
||||
|
||||
|
||||
|
||||
#define LOAD_CHI_SETUP(DIR,F) \
|
||||
g = F; \
|
||||
direction = st._directions[DIR]; \
|
||||
distance = st._distances[DIR]; \
|
||||
sl = st._grid->_simd_layout[direction]; \
|
||||
inplace_twist = 0; \
|
||||
if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \
|
||||
if(sl == 1){ \
|
||||
g = (F+1) % 2; \
|
||||
}else{ \
|
||||
inplace_twist = 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteSpinor &ref(in._odata[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHIMU_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
|
||||
{ const SiteHalfSpinor &ref(buf[offset]); \
|
||||
LOAD_CHI_SETUP(DIR,F); \
|
||||
if(!inplace_twist){ \
|
||||
LOAD_CHI_BODY(g); \
|
||||
}else{ \
|
||||
if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
|
||||
( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
|
||||
DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
}else{ \
|
||||
DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
|
||||
DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
|
||||
DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
|
||||
DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
|
||||
DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN_BODY \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00; \
|
||||
UChi_10 = U_00*Chi_10; \
|
||||
UChi_01 = U_10*Chi_00; \
|
||||
UChi_11 = U_10*Chi_10; \
|
||||
UChi_02 = U_20*Chi_00; \
|
||||
UChi_12 = U_20*Chi_10; \
|
||||
UChi_00+= U_01*Chi_01; \
|
||||
UChi_10+= U_01*Chi_11; \
|
||||
UChi_01+= U_11*Chi_01; \
|
||||
UChi_11+= U_11*Chi_11; \
|
||||
UChi_02+= U_21*Chi_01; \
|
||||
UChi_12+= U_21*Chi_11; \
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02; \
|
||||
UChi_10+= U_00*Chi_12; \
|
||||
UChi_01+= U_10*Chi_02; \
|
||||
UChi_11+= U_10*Chi_12; \
|
||||
UChi_02+= U_20*Chi_02; \
|
||||
UChi_12+= U_20*Chi_12
|
||||
|
||||
|
||||
#define MULT_2SPIN(A,F) \
|
||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||
|
||||
#define MULT_2SPIN_GPARITY(A,F) \
|
||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_00,Chi_00);\
|
||||
permute##dir(Chi_01,Chi_01);\
|
||||
permute##dir(Chi_02,Chi_02);\
|
||||
permute##dir(Chi_10,Chi_10);\
|
||||
permute##dir(Chi_11,Chi_11);\
|
||||
permute##dir(Chi_12,Chi_12);
|
||||
|
||||
// hspin(0)=fspin(0)+timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)+timesI(fspin(2));
|
||||
#define XP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01+timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02+timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10+timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11+timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12+timesI(Chimu_22);
|
||||
|
||||
#define YP_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_30;\
|
||||
Chi_01 = Chimu_01-Chimu_31;\
|
||||
Chi_02 = Chimu_02-Chimu_32;\
|
||||
Chi_10 = Chimu_10+Chimu_20;\
|
||||
Chi_11 = Chimu_11+Chimu_21;\
|
||||
Chi_12 = Chimu_12+Chimu_22;
|
||||
|
||||
#define ZP_PROJ \
|
||||
Chi_00 = Chimu_00+timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01+timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02+timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10-timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11-timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12-timesI(Chimu_32);
|
||||
|
||||
#define TP_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_20; \
|
||||
Chi_01 = Chimu_01+Chimu_21; \
|
||||
Chi_02 = Chimu_02+Chimu_22; \
|
||||
Chi_10 = Chimu_10+Chimu_30; \
|
||||
Chi_11 = Chimu_11+Chimu_31; \
|
||||
Chi_12 = Chimu_12+Chimu_32;
|
||||
|
||||
|
||||
// hspin(0)=fspin(0)-timesI(fspin(3));
|
||||
// hspin(1)=fspin(1)-timesI(fspin(2));
|
||||
#define XM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_30);\
|
||||
Chi_01 = Chimu_01-timesI(Chimu_31);\
|
||||
Chi_02 = Chimu_02-timesI(Chimu_32);\
|
||||
Chi_10 = Chimu_10-timesI(Chimu_20);\
|
||||
Chi_11 = Chimu_11-timesI(Chimu_21);\
|
||||
Chi_12 = Chimu_12-timesI(Chimu_22);
|
||||
|
||||
#define YM_PROJ \
|
||||
Chi_00 = Chimu_00+Chimu_30;\
|
||||
Chi_01 = Chimu_01+Chimu_31;\
|
||||
Chi_02 = Chimu_02+Chimu_32;\
|
||||
Chi_10 = Chimu_10-Chimu_20;\
|
||||
Chi_11 = Chimu_11-Chimu_21;\
|
||||
Chi_12 = Chimu_12-Chimu_22;
|
||||
|
||||
#define ZM_PROJ \
|
||||
Chi_00 = Chimu_00-timesI(Chimu_20); \
|
||||
Chi_01 = Chimu_01-timesI(Chimu_21); \
|
||||
Chi_02 = Chimu_02-timesI(Chimu_22); \
|
||||
Chi_10 = Chimu_10+timesI(Chimu_30); \
|
||||
Chi_11 = Chimu_11+timesI(Chimu_31); \
|
||||
Chi_12 = Chimu_12+timesI(Chimu_32);
|
||||
|
||||
#define TM_PROJ \
|
||||
Chi_00 = Chimu_00-Chimu_20; \
|
||||
Chi_01 = Chimu_01-Chimu_21; \
|
||||
Chi_02 = Chimu_02-Chimu_22; \
|
||||
Chi_10 = Chimu_10-Chimu_30; \
|
||||
Chi_11 = Chimu_11-Chimu_31; \
|
||||
Chi_12 = Chimu_12-Chimu_32;
|
||||
|
||||
// fspin(0)=hspin(0);
|
||||
// fspin(1)=hspin(1);
|
||||
// fspin(2)=timesMinusI(hspin(1));
|
||||
// fspin(3)=timesMinusI(hspin(0));
|
||||
#define XP_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesMinusI(UChi_10);\
|
||||
result_21 = timesMinusI(UChi_11);\
|
||||
result_22 = timesMinusI(UChi_12);\
|
||||
result_30 = timesMinusI(UChi_00);\
|
||||
result_31 = timesMinusI(UChi_01);\
|
||||
result_32 = timesMinusI(UChi_02);
|
||||
|
||||
#define XP_RECON_ACCUM\
|
||||
result_00+=UChi_00;\
|
||||
result_01+=UChi_01;\
|
||||
result_02+=UChi_02;\
|
||||
result_10+=UChi_10;\
|
||||
result_11+=UChi_11;\
|
||||
result_12+=UChi_12;\
|
||||
result_20-=timesI(UChi_10);\
|
||||
result_21-=timesI(UChi_11);\
|
||||
result_22-=timesI(UChi_12);\
|
||||
result_30-=timesI(UChi_00);\
|
||||
result_31-=timesI(UChi_01);\
|
||||
result_32-=timesI(UChi_02);
|
||||
|
||||
#define XM_RECON\
|
||||
result_00 = UChi_00;\
|
||||
result_01 = UChi_01;\
|
||||
result_02 = UChi_02;\
|
||||
result_10 = UChi_10;\
|
||||
result_11 = UChi_11;\
|
||||
result_12 = UChi_12;\
|
||||
result_20 = timesI(UChi_10);\
|
||||
result_21 = timesI(UChi_11);\
|
||||
result_22 = timesI(UChi_12);\
|
||||
result_30 = timesI(UChi_00);\
|
||||
result_31 = timesI(UChi_01);\
|
||||
result_32 = timesI(UChi_02);
|
||||
|
||||
#define XM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_10);\
|
||||
result_21+= timesI(UChi_11);\
|
||||
result_22+= timesI(UChi_12);\
|
||||
result_30+= timesI(UChi_00);\
|
||||
result_31+= timesI(UChi_01);\
|
||||
result_32+= timesI(UChi_02);
|
||||
|
||||
#define YP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_10;\
|
||||
result_21+= UChi_11;\
|
||||
result_22+= UChi_12;\
|
||||
result_30-= UChi_00;\
|
||||
result_31-= UChi_01;\
|
||||
result_32-= UChi_02;
|
||||
|
||||
#define YM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_10;\
|
||||
result_21-= UChi_11;\
|
||||
result_22-= UChi_12;\
|
||||
result_30+= UChi_00;\
|
||||
result_31+= UChi_01;\
|
||||
result_32+= UChi_02;
|
||||
|
||||
#define ZP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= timesI(UChi_00); \
|
||||
result_21-= timesI(UChi_01); \
|
||||
result_22-= timesI(UChi_02); \
|
||||
result_30+= timesI(UChi_10); \
|
||||
result_31+= timesI(UChi_11); \
|
||||
result_32+= timesI(UChi_12);
|
||||
|
||||
#define ZM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= timesI(UChi_00); \
|
||||
result_21+= timesI(UChi_01); \
|
||||
result_22+= timesI(UChi_02); \
|
||||
result_30-= timesI(UChi_10); \
|
||||
result_31-= timesI(UChi_11); \
|
||||
result_32-= timesI(UChi_12);
|
||||
|
||||
#define TP_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20+= UChi_00; \
|
||||
result_21+= UChi_01; \
|
||||
result_22+= UChi_02; \
|
||||
result_30+= UChi_10; \
|
||||
result_31+= UChi_11; \
|
||||
result_32+= UChi_12;
|
||||
|
||||
#define TM_RECON_ACCUM\
|
||||
result_00+= UChi_00;\
|
||||
result_01+= UChi_01;\
|
||||
result_02+= UChi_02;\
|
||||
result_10+= UChi_10;\
|
||||
result_11+= UChi_11;\
|
||||
result_12+= UChi_12;\
|
||||
result_20-= UChi_00; \
|
||||
result_21-= UChi_01; \
|
||||
result_22-= UChi_02; \
|
||||
result_30-= UChi_10; \
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON;
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU_IMPL(DIR,F,PERM); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI_IMPL(DIR,F,PERM); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss,F) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref(F)(0)(0),result_00); \
|
||||
vstream(ref(F)(0)(1),result_01); \
|
||||
vstream(ref(F)(0)(2),result_02); \
|
||||
vstream(ref(F)(1)(0),result_10); \
|
||||
vstream(ref(F)(1)(1),result_11); \
|
||||
vstream(ref(F)(1)(2),result_12); \
|
||||
vstream(ref(F)(2)(0),result_20); \
|
||||
vstream(ref(F)(2)(1),result_21); \
|
||||
vstream(ref(F)(2)(2),result_22); \
|
||||
vstream(ref(F)(3)(0),result_30); \
|
||||
vstream(ref(F)(3)(1),result_31); \
|
||||
vstream(ref(F)(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss,F) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref(F)(0)(0)+=result_00; \
|
||||
ref(F)(0)(1)+=result_01; \
|
||||
ref(F)(0)(2)+=result_02; \
|
||||
ref(F)(1)(0)+=result_10; \
|
||||
ref(F)(1)(1)+=result_11; \
|
||||
ref(F)(1)(2)+=result_12; \
|
||||
ref(F)(2)(0)+=result_20; \
|
||||
ref(F)(2)(1)+=result_21; \
|
||||
ref(F)(2)(2)+=result_22; \
|
||||
ref(F)(3)(0)+=result_30; \
|
||||
ref(F)(3)(1)+=result_31; \
|
||||
ref(F)(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
#define HAND_DECLARATIONS(a) \
|
||||
Simd result_00; \
|
||||
Simd result_01; \
|
||||
Simd result_02; \
|
||||
Simd result_10; \
|
||||
Simd result_11; \
|
||||
Simd result_12; \
|
||||
Simd result_20; \
|
||||
Simd result_21; \
|
||||
Simd result_22; \
|
||||
Simd result_30; \
|
||||
Simd result_31; \
|
||||
Simd result_32; \
|
||||
Simd Chi_00; \
|
||||
Simd Chi_01; \
|
||||
Simd Chi_02; \
|
||||
Simd Chi_10; \
|
||||
Simd Chi_11; \
|
||||
Simd Chi_12; \
|
||||
Simd UChi_00; \
|
||||
Simd UChi_01; \
|
||||
Simd UChi_02; \
|
||||
Simd UChi_10; \
|
||||
Simd UChi_11; \
|
||||
Simd UChi_12; \
|
||||
Simd U_00; \
|
||||
Simd U_10; \
|
||||
Simd U_20; \
|
||||
Simd U_01; \
|
||||
Simd U_11; \
|
||||
Simd U_21;
|
||||
|
||||
#define ZERO_RESULT \
|
||||
result_00=zero; \
|
||||
result_01=zero; \
|
||||
result_02=zero; \
|
||||
result_10=zero; \
|
||||
result_11=zero; \
|
||||
result_12=zero; \
|
||||
result_20=zero; \
|
||||
result_21=zero; \
|
||||
result_22=zero; \
|
||||
result_30=zero; \
|
||||
result_31=zero; \
|
||||
result_32=zero;
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
|
||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
StencilEntry *SE; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
typedef typename Simd::vector_type V; \
|
||||
\
|
||||
HAND_DECLARATIONS(ignore); \
|
||||
\
|
||||
StencilEntry *SE; \
|
||||
int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
|
||||
int nmu=0; \
|
||||
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
nmu = 0; \
|
||||
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
}
|
||||
|
||||
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
INSTANTIATE_THEM(GparityWilsonImplF);
|
||||
INSTANTIATE_THEM(GparityWilsonImplD);
|
||||
INSTANTIATE_THEM(GparityWilsonImplFH);
|
||||
INSTANTIATE_THEM(GparityWilsonImplDF);
|
||||
}}
|
@ -1,155 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonTMFermion5D.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk> ; NB Christoph did similar in GPT
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
|
||||
template<class Impl>
|
||||
class WilsonTMFermion5D : public WilsonFermion5D<Impl>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
||||
// Constructors
|
||||
WilsonTMFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Frbgrid,
|
||||
GridCartesian &Ugrid,
|
||||
GridRedBlackCartesian &Urbgrid,
|
||||
const std::vector<RealD> _mass,
|
||||
const std::vector<RealD> _mu,
|
||||
const ImplParams &p= ImplParams()
|
||||
) :
|
||||
WilsonFermion5D<Impl>(_Umu,
|
||||
Fgrid,
|
||||
Frbgrid,
|
||||
Ugrid,
|
||||
Urbgrid,
|
||||
4.0,p)
|
||||
|
||||
{
|
||||
update(_mass,_mu);
|
||||
}
|
||||
|
||||
virtual void Meooe(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
this->DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
this->DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void MeooeDag(const FermionField &in, FermionField &out) {
|
||||
if (in.checkerboard == Odd) {
|
||||
this->DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
this->DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
// allow override for twisted mass and clover
|
||||
virtual void Mooee(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
//axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void MooeeDag(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD b(0.0,-this->mu[s]);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
}
|
||||
virtual void MooeeInv(const FermionField &in, FermionField &out) {
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
RealD m = this->mass[s];
|
||||
RealD tm = this->mu[s];
|
||||
RealD mtil = 4.0+this->mass[s];
|
||||
RealD sq = mtil*mtil+tm*tm;
|
||||
ComplexD a = mtil/sq;
|
||||
ComplexD b(0.0, -tm /sq);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
}
|
||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out) {
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
RealD m = this->mass[s];
|
||||
RealD tm = this->mu[s];
|
||||
RealD mtil = 4.0+this->mass[s];
|
||||
RealD sq = mtil*mtil+tm*tm;
|
||||
ComplexD a = mtil/sq;
|
||||
ComplexD b(0.0,tm /sq);
|
||||
axpbg5y_ssp(out,a,in,b,in,s,s);
|
||||
}
|
||||
}
|
||||
|
||||
virtual RealD M(const FermionField &in, FermionField &out) {
|
||||
out.checkerboard = in.checkerboard;
|
||||
this->Dhop(in, out, DaggerNo);
|
||||
FermionField tmp(out._grid);
|
||||
for (int s=0;s<(int)this->mass.size();s++) {
|
||||
ComplexD a = 4.0+this->mass[s];
|
||||
ComplexD b(0.0,this->mu[s]);
|
||||
axpbg5y_ssp(tmp,a,in,b,in,s,s);
|
||||
}
|
||||
return axpy_norm(out, 1.0, tmp, out);
|
||||
}
|
||||
|
||||
// needed for fast PV
|
||||
void update(const std::vector<RealD>& _mass, const std::vector<RealD>& _mu) {
|
||||
assert(_mass.size() == _mu.size());
|
||||
assert(_mass.size() == this->FermionGrid()->_fdimensions[0]);
|
||||
this->mass = _mass;
|
||||
this->mu = _mu;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<RealD> mu;
|
||||
std::vector<RealD> mass;
|
||||
|
||||
};
|
||||
|
||||
typedef WilsonTMFermion5D<WilsonImplF> WilsonTMFermion5DF;
|
||||
typedef WilsonTMFermion5D<WilsonImplD> WilsonTMFermion5DD;
|
||||
|
||||
}}
|
@ -1,153 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_GAUGE_IMPL_TYPES_H
|
||||
#define GRID_GAUGE_IMPL_TYPES_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Implementation dependent gauge types
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define INHERIT_GIMPL_TYPES(GImpl) \
|
||||
typedef typename GImpl::Simd Simd; \
|
||||
typedef typename GImpl::LinkField GaugeLinkField; \
|
||||
typedef typename GImpl::Field GaugeField; \
|
||||
typedef typename GImpl::ComplexField ComplexField;\
|
||||
typedef typename GImpl::SiteField SiteGaugeField; \
|
||||
typedef typename GImpl::SiteComplex SiteComplex; \
|
||||
typedef typename GImpl::SiteLink SiteGaugeLink;
|
||||
|
||||
#define INHERIT_FIELD_TYPES(Impl) \
|
||||
typedef typename Impl::Simd Simd; \
|
||||
typedef typename Impl::ComplexField ComplexField; \
|
||||
typedef typename Impl::SiteField SiteField; \
|
||||
typedef typename Impl::Field Field;
|
||||
|
||||
// hardcodes the exponential approximation in the template
|
||||
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
|
||||
public:
|
||||
typedef S Simd;
|
||||
|
||||
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||
|
||||
typedef iImplScalar<Simd> SiteComplex;
|
||||
typedef iImplGaugeLink<Simd> SiteLink;
|
||||
typedef iImplGaugeField<Simd> SiteField;
|
||||
|
||||
typedef Lattice<SiteComplex> ComplexField;
|
||||
typedef Lattice<SiteLink> LinkField;
|
||||
typedef Lattice<SiteField> Field;
|
||||
|
||||
// Guido: we can probably separate the types from the HMC functions
|
||||
// this will create 2 kind of implementations
|
||||
// probably confusing the users
|
||||
// Now keeping only one class
|
||||
|
||||
|
||||
// Move this elsewhere? FIXME
|
||||
static inline void AddLink(Field &U, LinkField &W,
|
||||
int mu) { // U[mu] += W
|
||||
PARALLEL_FOR_LOOP
|
||||
for (auto ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
U._odata[ss]._internal[mu] =
|
||||
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Move these to another class
|
||||
// HMC auxiliary functions
|
||||
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) {
|
||||
// specific for SU gauge fields
|
||||
LinkField Pmu(P._grid);
|
||||
Pmu = zero;
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
|
||||
PokeIndex<LorentzIndex>(P, Pmu, mu);
|
||||
}
|
||||
}
|
||||
|
||||
static inline Field projectForce(Field &P) { return Ta(P); }
|
||||
|
||||
static inline void update_field(Field& P, Field& U, double ep){
|
||||
//static std::chrono::duration<double> diff;
|
||||
|
||||
//auto start = std::chrono::high_resolution_clock::now();
|
||||
parallel_for(int ss=0;ss<P._grid->oSites();ss++){
|
||||
for (int mu = 0; mu < Nd; mu++)
|
||||
U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]);
|
||||
}
|
||||
|
||||
//auto end = std::chrono::high_resolution_clock::now();
|
||||
// diff += end - start;
|
||||
// std::cout << "Time to exponentiate matrix " << diff.count() << " s\n";
|
||||
}
|
||||
|
||||
static inline RealD FieldSquareNorm(Field& U){
|
||||
LatticeComplex Hloc(U._grid);
|
||||
Hloc = zero;
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
auto Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||
Hloc += trace(Umu * Umu);
|
||||
}
|
||||
Complex Hsum = sum(Hloc);
|
||||
return Hsum.real();
|
||||
}
|
||||
|
||||
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
SU<Nc>::HotConfiguration(pRNG, U);
|
||||
}
|
||||
|
||||
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
SU<Nc>::TepidConfiguration(pRNG, U);
|
||||
}
|
||||
|
||||
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
SU<Nc>::ColdConfiguration(pRNG, U);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
|
||||
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
|
||||
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
|
||||
|
||||
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
|
||||
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
|
||||
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
|
||||
|
||||
|
||||
} // QCD
|
||||
} // Grid
|
||||
|
||||
#endif // GRID_GAUGE_IMPL_TYPES_H
|
@ -1,417 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/Photon.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef QCD_PHOTON_ACTION_H
|
||||
#define QCD_PHOTON_ACTION_H
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
template <class S>
|
||||
class QedGimpl
|
||||
{
|
||||
public:
|
||||
typedef S Simd;
|
||||
|
||||
template <typename vtype>
|
||||
using iImplGaugeLink = iScalar<iScalar<iScalar<vtype>>>;
|
||||
template <typename vtype>
|
||||
using iImplGaugeField = iVector<iScalar<iScalar<vtype>>, Nd>;
|
||||
|
||||
typedef iImplGaugeLink<Simd> SiteLink;
|
||||
typedef iImplGaugeField<Simd> SiteField;
|
||||
typedef SiteField SiteComplex;
|
||||
|
||||
typedef Lattice<SiteLink> LinkField;
|
||||
typedef Lattice<SiteField> Field;
|
||||
typedef Field ComplexField;
|
||||
};
|
||||
|
||||
typedef QedGimpl<vComplex> QedGimplR;
|
||||
|
||||
template<class Gimpl>
|
||||
class Photon
|
||||
{
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
|
||||
GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3);
|
||||
public:
|
||||
Photon(Gauge gauge, ZmScheme zmScheme);
|
||||
Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements);
|
||||
Photon(Gauge gauge, ZmScheme zmScheme, Real G0);
|
||||
Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0);
|
||||
virtual ~Photon(void) = default;
|
||||
void FreePropagator(const GaugeField &in, GaugeField &out);
|
||||
void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
|
||||
void StochasticWeight(GaugeLinkField &weight);
|
||||
void StochasticField(GaugeField &out, GridParallelRNG &rng);
|
||||
void StochasticField(GaugeField &out, GridParallelRNG &rng,
|
||||
const GaugeLinkField &weight);
|
||||
void UnitField(GaugeField &out);
|
||||
private:
|
||||
void infVolPropagator(GaugeLinkField &out);
|
||||
void invKHatSquared(GaugeLinkField &out);
|
||||
void zmSub(GaugeLinkField &out);
|
||||
private:
|
||||
Gauge gauge_;
|
||||
ZmScheme zmScheme_;
|
||||
std::vector<Real> improvement_;
|
||||
Real G0_;
|
||||
};
|
||||
|
||||
typedef Photon<QedGimplR> PhotonR;
|
||||
|
||||
template<class Gimpl>
|
||||
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
|
||||
: gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()),
|
||||
G0_(0.15493339023106021408483720810737508876916113364521)
|
||||
{}
|
||||
|
||||
template<class Gimpl>
|
||||
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
|
||||
std::vector<Real> improvements)
|
||||
: gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements),
|
||||
G0_(0.15493339023106021408483720810737508876916113364521)
|
||||
{}
|
||||
|
||||
template<class Gimpl>
|
||||
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0)
|
||||
: gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0)
|
||||
{}
|
||||
|
||||
template<class Gimpl>
|
||||
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
|
||||
std::vector<Real> improvements, Real G0)
|
||||
: gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0)
|
||||
{}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
|
||||
{
|
||||
FFT theFFT(in._grid);
|
||||
|
||||
GaugeField in_k(in._grid);
|
||||
GaugeField prop_k(in._grid);
|
||||
|
||||
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||
MomentumSpacePropagator(prop_k,in_k);
|
||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out)
|
||||
{
|
||||
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
|
||||
LatticeReal xmu(grid);
|
||||
GaugeLinkField one(grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
std::vector<int> &l = grid->_fdimensions;
|
||||
std::vector<int> x0(nd,0);
|
||||
TComplex Tone = Complex(1.0,0.0);
|
||||
TComplex Tzero = Complex(G0_,0.0);
|
||||
FFT fft(grid);
|
||||
|
||||
one = Complex(1.0,0.0);
|
||||
out = zero;
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
LatticeCoordinate(xmu,mu);
|
||||
Real lo2 = l[mu]/2.0;
|
||||
xmu = where(xmu < lo2, xmu, xmu-double(l[mu]));
|
||||
out = out + toComplex(4*M_PI*M_PI*xmu*xmu);
|
||||
}
|
||||
pokeSite(Tone, out, x0);
|
||||
out = one/out;
|
||||
pokeSite(Tzero, out, x0);
|
||||
fft.FFT_all_dim(out, out, FFT::forward);
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
GaugeLinkField kmu(grid), one(grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
std::vector<int> &l = grid->_fdimensions;
|
||||
std::vector<int> zm(nd,0);
|
||||
TComplex Tone = Complex(1.0,0.0);
|
||||
TComplex Tzero= Complex(0.0,0.0);
|
||||
|
||||
one = Complex(1.0,0.0);
|
||||
out = zero;
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
Real twoPiL = M_PI*2./l[mu];
|
||||
|
||||
LatticeCoordinate(kmu,mu);
|
||||
kmu = 2.*sin(.5*twoPiL*kmu);
|
||||
out = out + kmu*kmu;
|
||||
}
|
||||
pokeSite(Tone, out, zm);
|
||||
out = one/out;
|
||||
pokeSite(Tzero, out, zm);
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::zmSub(GaugeLinkField &out)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
std::vector<int> &l = grid->_fdimensions;
|
||||
|
||||
switch (zmScheme_)
|
||||
{
|
||||
case ZmScheme::qedTL:
|
||||
{
|
||||
std::vector<int> zm(nd,0);
|
||||
TComplex Tzero = Complex(0.0,0.0);
|
||||
|
||||
pokeSite(Tzero, out, zm);
|
||||
|
||||
break;
|
||||
}
|
||||
case ZmScheme::qedL:
|
||||
{
|
||||
LatticeInteger spNrm(grid), coor(grid);
|
||||
GaugeLinkField z(grid);
|
||||
|
||||
spNrm = zero;
|
||||
for(int d = 0; d < grid->_ndimension - 1; d++)
|
||||
{
|
||||
LatticeCoordinate(coor,d);
|
||||
coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d]));
|
||||
spNrm = spNrm + coor*coor;
|
||||
}
|
||||
out = where(spNrm == Integer(0), 0.*out, out);
|
||||
|
||||
// IR improvement
|
||||
for(int i = 0; i < improvement_.size(); i++)
|
||||
{
|
||||
Real f = sqrt(improvement_[i]+1);
|
||||
out = where(spNrm == Integer(i+1), f*out, out);
|
||||
}
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
|
||||
GaugeField &out)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
LatticeComplex momProp(grid);
|
||||
|
||||
switch (zmScheme_)
|
||||
{
|
||||
case ZmScheme::qedTL:
|
||||
case ZmScheme::qedL:
|
||||
{
|
||||
invKHatSquared(momProp);
|
||||
zmSub(momProp);
|
||||
break;
|
||||
}
|
||||
case ZmScheme::qedInf:
|
||||
{
|
||||
infVolPropagator(momProp);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
out = in*momProp;
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight)
|
||||
{
|
||||
auto *grid = dynamic_cast<GridCartesian *>(weight._grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
std::vector<int> latt_size = grid->_fdimensions;
|
||||
|
||||
switch (zmScheme_)
|
||||
{
|
||||
case ZmScheme::qedTL:
|
||||
case ZmScheme::qedL:
|
||||
{
|
||||
Integer vol = 1;
|
||||
for(int d = 0; d < nd; d++)
|
||||
{
|
||||
vol = vol * latt_size[d];
|
||||
}
|
||||
invKHatSquared(weight);
|
||||
weight = sqrt(vol)*sqrt(weight);
|
||||
zmSub(weight);
|
||||
break;
|
||||
}
|
||||
case ZmScheme::qedInf:
|
||||
{
|
||||
infVolPropagator(weight);
|
||||
weight = sqrt(real(weight));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
|
||||
{
|
||||
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
|
||||
GaugeLinkField weight(grid);
|
||||
|
||||
StochasticWeight(weight);
|
||||
StochasticField(out, rng, weight);
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
|
||||
const GaugeLinkField &weight)
|
||||
{
|
||||
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
GaugeLinkField r(grid);
|
||||
GaugeField aTilde(grid);
|
||||
FFT fft(grid);
|
||||
|
||||
switch (zmScheme_)
|
||||
{
|
||||
case ZmScheme::qedTL:
|
||||
case ZmScheme::qedL:
|
||||
{
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
gaussian(rng, r);
|
||||
r = weight*r;
|
||||
pokeLorentz(aTilde, r, mu);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ZmScheme::qedInf:
|
||||
{
|
||||
Complex shift(1., 1.); // This needs to be a GaugeLink element?
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
bernoulli(rng, r);
|
||||
r = weight*(2.*r - shift);
|
||||
pokeLorentz(aTilde, r, mu);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fft.FFT_all_dim(out, aTilde, FFT::backward);
|
||||
|
||||
out = real(out);
|
||||
}
|
||||
|
||||
template<class Gimpl>
|
||||
void Photon<Gimpl>::UnitField(GaugeField &out)
|
||||
{
|
||||
auto *grid = dynamic_cast<GridCartesian *>(out._grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
GaugeLinkField r(grid);
|
||||
|
||||
r = Complex(1.0,0.0);
|
||||
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
pokeLorentz(out, r, mu);
|
||||
}
|
||||
|
||||
out = real(out);
|
||||
}
|
||||
// template<class Gimpl>
|
||||
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
|
||||
// const GaugeField &in)
|
||||
// {
|
||||
//
|
||||
// FeynmanGaugeMomentumSpacePropagator_TL(out,in);
|
||||
//
|
||||
// GridBase *grid = out._grid;
|
||||
// LatticeInteger coor(grid);
|
||||
// GaugeField zz(grid); zz=zero;
|
||||
//
|
||||
// // xyzt
|
||||
// for(int d = 0; d < grid->_ndimension-1;d++){
|
||||
// LatticeCoordinate(coor,d);
|
||||
// out = where(coor==Integer(0),zz,out);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// template<class Gimpl>
|
||||
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
|
||||
// const GaugeField &in)
|
||||
// {
|
||||
//
|
||||
// // what type LatticeComplex
|
||||
// GridBase *grid = out._grid;
|
||||
// int nd = grid->_ndimension;
|
||||
//
|
||||
// typedef typename GaugeField::vector_type vector_type;
|
||||
// typedef typename GaugeField::scalar_type ScalComplex;
|
||||
// typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||
//
|
||||
// std::vector<int> latt_size = grid->_fdimensions;
|
||||
//
|
||||
// LatComplex denom(grid); denom= zero;
|
||||
// LatComplex one(grid); one = ScalComplex(1.0,0.0);
|
||||
// LatComplex kmu(grid);
|
||||
//
|
||||
// ScalComplex ci(0.0,1.0);
|
||||
// // momphase = n * 2pi / L
|
||||
// for(int mu=0;mu<Nd;mu++) {
|
||||
//
|
||||
// LatticeCoordinate(kmu,mu);
|
||||
//
|
||||
// RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
//
|
||||
// kmu = TwoPiL * kmu ;
|
||||
//
|
||||
// denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
||||
// }
|
||||
// std::vector<int> zero_mode(nd,0);
|
||||
// TComplexD Tone = ComplexD(1.0,0.0);
|
||||
// TComplexD Tzero= ComplexD(0.0,0.0);
|
||||
//
|
||||
// pokeSite(Tone,denom,zero_mode);
|
||||
//
|
||||
// denom= one/denom;
|
||||
//
|
||||
// pokeSite(Tzero,denom,zero_mode);
|
||||
//
|
||||
// out = zero;
|
||||
// out = in*denom;
|
||||
// };
|
||||
|
||||
}}
|
||||
#endif
|
@ -1,95 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef QCD_WILSON_GAUGE_ACTION_H
|
||||
#define QCD_WILSON_GAUGE_ACTION_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Wilson Gauge Action .. should I template the Nc etc..
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template <class Gimpl>
|
||||
class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> {
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
/////////////////////////// constructors
|
||||
explicit WilsonGaugeAction(RealD beta_):beta(beta_){};
|
||||
|
||||
virtual std::string action_name() {return "WilsonGaugeAction";}
|
||||
|
||||
virtual std::string LogParameters(){
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "[WilsonGaugeAction] Beta: " << beta << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
virtual void refresh(const GaugeField &U,
|
||||
GridParallelRNG &pRNG){}; // noop as no pseudoferms
|
||||
|
||||
virtual RealD S(const GaugeField &U) {
|
||||
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
|
||||
RealD vol = U._grid->gSites();
|
||||
RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5;
|
||||
return action;
|
||||
};
|
||||
|
||||
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
|
||||
// not optimal implementation FIXME
|
||||
// extend Ta to include Lorentz indexes
|
||||
|
||||
RealD factor = 0.5 * beta / RealD(Nc);
|
||||
|
||||
GaugeLinkField Umu(U._grid);
|
||||
GaugeLinkField dSdU_mu(U._grid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||
|
||||
// Staple in direction mu
|
||||
WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu);
|
||||
dSdU_mu = Ta(Umu * dSdU_mu) * factor;
|
||||
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
}
|
||||
private:
|
||||
RealD beta;
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -1,145 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H
|
||||
#define QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
// Base even odd HMC on the normal Mee based schur decomposition.
|
||||
//
|
||||
// M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
// (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
//
|
||||
// Determinant is det of middle factor
|
||||
// This assumes Mee is indept of U.
|
||||
//
|
||||
template<class Impl>
|
||||
class SchurDifferentiableOperator : public SchurDiagMooeeOperator<FermionOperator<Impl>,typename Impl::FermionField>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
typedef FermionOperator<Impl> Matrix;
|
||||
|
||||
SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator<Matrix,FermionField>(Mat) {};
|
||||
|
||||
void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
|
||||
|
||||
GridBase *fgrid = this->_Mat.FermionGrid();
|
||||
GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
|
||||
|
||||
FermionField tmp1(fcbgrid);
|
||||
FermionField tmp2(fcbgrid);
|
||||
|
||||
conformable(fcbgrid,U._grid);
|
||||
conformable(fcbgrid,V._grid);
|
||||
|
||||
// Assert the checkerboard?? or code for either
|
||||
assert(U.checkerboard==Odd);
|
||||
assert(V.checkerboard==U.checkerboard);
|
||||
|
||||
// NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE
|
||||
// it is not conformable with the HMC force field
|
||||
// Case: Ls vectorised fields
|
||||
// INHERIT FROM THE Force field instead
|
||||
GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
|
||||
GaugeField ForceO(forcecb);
|
||||
GaugeField ForceE(forcecb);
|
||||
|
||||
|
||||
// X^dag Der_oe MeeInv Meo Y
|
||||
// Use Mooee as nontrivial but gauge field indept
|
||||
this->_Mat.Meooe (V,tmp1); // odd->even -- implicit -0.5 factor to be applied
|
||||
this->_Mat.MooeeInv(tmp1,tmp2); // even->even
|
||||
this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo);
|
||||
// Accumulate X^dag M_oe MeeInv Der_eo Y
|
||||
this->_Mat.MeooeDag (U,tmp1); // even->odd -- implicit -0.5 factor to be applied
|
||||
this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even
|
||||
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo);
|
||||
|
||||
assert(ForceE.checkerboard==Even);
|
||||
assert(ForceO.checkerboard==Odd);
|
||||
|
||||
setCheckerboard(Force,ForceE);
|
||||
setCheckerboard(Force,ForceO);
|
||||
Force=-Force;
|
||||
|
||||
delete forcecb;
|
||||
}
|
||||
|
||||
|
||||
void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
|
||||
|
||||
GridBase *fgrid = this->_Mat.FermionGrid();
|
||||
GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
|
||||
|
||||
FermionField tmp1(fcbgrid);
|
||||
FermionField tmp2(fcbgrid);
|
||||
|
||||
conformable(fcbgrid,U._grid);
|
||||
conformable(fcbgrid,V._grid);
|
||||
|
||||
// Assert the checkerboard?? or code for either
|
||||
assert(V.checkerboard==Odd);
|
||||
assert(V.checkerboard==V.checkerboard);
|
||||
|
||||
// NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE
|
||||
// it is not conformable with the HMC force field
|
||||
// INHERIT FROM THE Force field instead
|
||||
GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
|
||||
GaugeField ForceO(forcecb);
|
||||
GaugeField ForceE(forcecb);
|
||||
|
||||
// X^dag Der_oe MeeInv Meo Y
|
||||
// Use Mooee as nontrivial but gauge field indept
|
||||
this->_Mat.MeooeDag (V,tmp1); // odd->even -- implicit -0.5 factor to be applied
|
||||
this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even
|
||||
this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes);
|
||||
|
||||
// Accumulate X^dag M_oe MeeInv Der_eo Y
|
||||
this->_Mat.Meooe (U,tmp1); // even->odd -- implicit -0.5 factor to be applied
|
||||
this->_Mat.MooeeInv(tmp1,tmp2); // even->even
|
||||
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes);
|
||||
|
||||
assert(ForceE.checkerboard==Even);
|
||||
assert(ForceO.checkerboard==Odd);
|
||||
|
||||
setCheckerboard(Force,ForceE);
|
||||
setCheckerboard(Force,ForceO);
|
||||
Force=-Force;
|
||||
|
||||
delete forcecb;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
@ -1,264 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Implementation of exact one flavour algorithm (EOFA) //
|
||||
// using fermion classes defined in: //
|
||||
// Grid/qcd/action/fermion/DomainWallEOFAFermion.h (Shamir) //
|
||||
// Grid/qcd/action/fermion/MobiusEOFAFermion.h (Mobius) //
|
||||
// arXiv: 1403.1683, 1706.05843 //
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
|
||||
#define QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Exact one flavour implementation of DWF determinant ratio //
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef OneFlavourRationalParams Params;
|
||||
Params param;
|
||||
MultiShiftFunction PowerNegHalf;
|
||||
|
||||
private:
|
||||
bool use_heatbath_forecasting;
|
||||
AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
|
||||
AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
|
||||
SchurRedBlackDiagMooeeSolve<FermionField> Solver;
|
||||
FermionField Phi; // the pseudofermion field for this trajectory
|
||||
|
||||
public:
|
||||
ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
|
||||
OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S),
|
||||
Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
|
||||
{
|
||||
AlgRemez remez(param.lo, param.hi, param.precision);
|
||||
|
||||
// MdagM^(+- 1/2)
|
||||
std::cout << GridLogMessage << "Generating degree " << param.degree << " for x^(-1/2)" << std::endl;
|
||||
remez.generateApprox(param.degree, 1, 2);
|
||||
PowerNegHalf.Init(remez, param.tolerance, true);
|
||||
};
|
||||
|
||||
virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
|
||||
|
||||
virtual std::string LogParameters() {
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Low :" << param.lo << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] High :" << param.hi << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Max iterations :" << param.MaxIter << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Tolerance :" << param.tolerance << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Degree :" << param.degree << std::endl;
|
||||
sstream << GridLogMessage << "[" << action_name() << "] Precision :" << param.precision << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
// Spin projection
|
||||
void spProj(const FermionField& in, FermionField& out, int sign, int Ls)
|
||||
{
|
||||
if(sign == 1){ for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(out, 0.0, in, 1.0, in, s, s); } }
|
||||
else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
|
||||
}
|
||||
|
||||
// EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
|
||||
// We generate a Gaussian noise vector \eta, and then compute
|
||||
// \Phi = M_{\rm EOFA}^{-1/2} * \eta
|
||||
// using a rational approximation to the inverse square root
|
||||
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField eta (Lop.FermionGrid());
|
||||
FermionField CG_src (Lop.FermionGrid());
|
||||
FermionField CG_soln (Lop.FermionGrid());
|
||||
FermionField Forecast_src(Lop.FermionGrid());
|
||||
std::vector<FermionField> tmp(2, Lop.FermionGrid());
|
||||
|
||||
// Use chronological inverter to forecast solutions across poles
|
||||
std::vector<FermionField> prev_solns;
|
||||
if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
|
||||
ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
|
||||
|
||||
// Seed with Gaussian noise vector (var = 0.5)
|
||||
RealD scale = std::sqrt(0.5);
|
||||
gaussian(pRNG,eta);
|
||||
eta = eta * scale;
|
||||
printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
|
||||
|
||||
// \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
|
||||
RealD N(PowerNegHalf.norm);
|
||||
for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
|
||||
Phi = eta * N;
|
||||
|
||||
// LH terms:
|
||||
// \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
|
||||
// - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
|
||||
RealD gamma_l(0.0);
|
||||
spProj(eta, tmp[0], -1, Lop.Ls);
|
||||
Lop.Omega(tmp[0], tmp[1], -1, 0);
|
||||
G5R5(CG_src, tmp[1]);
|
||||
tmp[1] = zero;
|
||||
for(int k=0; k<param.degree; ++k){
|
||||
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
|
||||
Lop.RefreshShiftCoefficients(-gamma_l);
|
||||
if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
|
||||
Lop.Mdag(CG_src, Forecast_src);
|
||||
CG_soln = Forecast(Lop, Forecast_src, prev_solns);
|
||||
Solver(Lop, CG_src, CG_soln);
|
||||
prev_solns.push_back(CG_soln);
|
||||
} else {
|
||||
CG_soln = zero; // Just use zero as the initial guess
|
||||
Solver(Lop, CG_src, CG_soln);
|
||||
}
|
||||
Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
|
||||
tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
|
||||
}
|
||||
Lop.Omega(tmp[1], tmp[0], -1, 1);
|
||||
spProj(tmp[0], tmp[1], -1, Lop.Ls);
|
||||
Phi = Phi + tmp[1];
|
||||
|
||||
// RH terms:
|
||||
// \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
|
||||
// + \gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
|
||||
spProj(eta, tmp[0], 1, Rop.Ls);
|
||||
Rop.Omega(tmp[0], tmp[1], 1, 0);
|
||||
G5R5(CG_src, tmp[1]);
|
||||
tmp[1] = zero;
|
||||
if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
|
||||
for(int k=0; k<param.degree; ++k){
|
||||
gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
|
||||
Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
|
||||
if(use_heatbath_forecasting){
|
||||
Rop.Mdag(CG_src, Forecast_src);
|
||||
CG_soln = Forecast(Rop, Forecast_src, prev_solns);
|
||||
Solver(Rop, CG_src, CG_soln);
|
||||
prev_solns.push_back(CG_soln);
|
||||
} else {
|
||||
CG_soln = zero;
|
||||
Solver(Rop, CG_src, CG_soln);
|
||||
}
|
||||
Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
|
||||
tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
|
||||
}
|
||||
Rop.Omega(tmp[1], tmp[0], 1, 1);
|
||||
spProj(tmp[0], tmp[1], 1, Rop.Ls);
|
||||
Phi = Phi + tmp[1];
|
||||
|
||||
// Reset shift coefficients for energy and force evals
|
||||
Lop.RefreshShiftCoefficients(0.0);
|
||||
Rop.RefreshShiftCoefficients(-1.0);
|
||||
};
|
||||
|
||||
// EOFA action: see Eqn. (10) of arXiv:1706.05843
|
||||
virtual RealD S(const GaugeField& U)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField spProj_Phi(Lop.FermionGrid());
|
||||
std::vector<FermionField> tmp(2, Lop.FermionGrid());
|
||||
|
||||
// S = <\Phi|\Phi>
|
||||
RealD action(norm2(Phi));
|
||||
|
||||
// LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
|
||||
spProj(Phi, spProj_Phi, -1, Lop.Ls);
|
||||
Lop.Omega(spProj_Phi, tmp[0], -1, 0);
|
||||
G5R5(tmp[1], tmp[0]);
|
||||
tmp[0] = zero;
|
||||
Solver(Lop, tmp[1], tmp[0]);
|
||||
Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
|
||||
Lop.Omega(tmp[1], tmp[0], -1, 1);
|
||||
action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
|
||||
|
||||
// RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
|
||||
// - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
|
||||
spProj(Phi, spProj_Phi, 1, Rop.Ls);
|
||||
Rop.Omega(spProj_Phi, tmp[0], 1, 0);
|
||||
G5R5(tmp[1], tmp[0]);
|
||||
tmp[0] = zero;
|
||||
Solver(Rop, tmp[1], tmp[0]);
|
||||
Rop.Dtilde(tmp[0], tmp[1]);
|
||||
Rop.Omega(tmp[1], tmp[0], 1, 1);
|
||||
action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
|
||||
|
||||
return action;
|
||||
};
|
||||
|
||||
// EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU)
|
||||
{
|
||||
Lop.ImportGauge(U);
|
||||
Rop.ImportGauge(U);
|
||||
|
||||
FermionField spProj_Phi (Lop.FermionGrid());
|
||||
FermionField Omega_spProj_Phi(Lop.FermionGrid());
|
||||
FermionField CG_src (Lop.FermionGrid());
|
||||
FermionField Chi (Lop.FermionGrid());
|
||||
FermionField g5_R5_Chi (Lop.FermionGrid());
|
||||
|
||||
GaugeField force(Lop.GaugeGrid());
|
||||
|
||||
// LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L}
|
||||
// \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi
|
||||
spProj(Phi, spProj_Phi, -1, Lop.Ls);
|
||||
Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
|
||||
G5R5(CG_src, Omega_spProj_Phi);
|
||||
spProj_Phi = zero;
|
||||
Solver(Lop, CG_src, spProj_Phi);
|
||||
Lop.Dtilde(spProj_Phi, Chi);
|
||||
G5R5(g5_R5_Chi, Chi);
|
||||
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
|
||||
dSdU = Lop.k * force;
|
||||
|
||||
// RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{}
|
||||
// \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi
|
||||
spProj(Phi, spProj_Phi, 1, Rop.Ls);
|
||||
Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
|
||||
G5R5(CG_src, Omega_spProj_Phi);
|
||||
spProj_Phi = zero;
|
||||
Solver(Rop, CG_src, spProj_Phi);
|
||||
Rop.Dtilde(spProj_Phi, Chi);
|
||||
G5R5(g5_R5_Chi, Chi);
|
||||
Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
|
||||
dSdU = dSdU - Rop.k * force;
|
||||
};
|
||||
};
|
||||
}}
|
||||
|
||||
#endif
|
@ -1,209 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H
|
||||
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H
|
||||
|
||||
namespace Grid{
|
||||
namespace QCD{
|
||||
|
||||
///////////////////////////////////////
|
||||
// Two flavour ratio
|
||||
///////////////////////////////////////
|
||||
template<class Impl>
|
||||
class TwoFlavourEvenOddRatioPseudoFermionAction : public Action<typename Impl::GaugeField> {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
|
||||
private:
|
||||
FermionOperator<Impl> & NumOp;// the basic operator
|
||||
FermionOperator<Impl> & DenOp;// the basic operator
|
||||
|
||||
OperatorFunction<FermionField> &DerivativeSolver;
|
||||
OperatorFunction<FermionField> &ActionSolver;
|
||||
|
||||
FermionField PhiOdd; // the pseudo fermion field for this trajectory
|
||||
FermionField PhiEven; // the pseudo fermion field for this trajectory
|
||||
|
||||
public:
|
||||
TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl> &_NumOp,
|
||||
FermionOperator<Impl> &_DenOp,
|
||||
OperatorFunction<FermionField> & DS,
|
||||
OperatorFunction<FermionField> & AS) :
|
||||
NumOp(_NumOp),
|
||||
DenOp(_DenOp),
|
||||
DerivativeSolver(DS),
|
||||
ActionSolver(AS),
|
||||
PhiEven(_NumOp.FermionRedBlackGrid()),
|
||||
PhiOdd(_NumOp.FermionRedBlackGrid())
|
||||
{
|
||||
conformable(_NumOp.FermionGrid(), _DenOp.FermionGrid());
|
||||
conformable(_NumOp.FermionRedBlackGrid(), _DenOp.FermionRedBlackGrid());
|
||||
conformable(_NumOp.GaugeGrid(), _DenOp.GaugeGrid());
|
||||
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
|
||||
};
|
||||
|
||||
virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";}
|
||||
|
||||
virtual std::string LogParameters(){
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
|
||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
|
||||
|
||||
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
|
||||
//
|
||||
// NumOp == V
|
||||
// DenOp == M
|
||||
//
|
||||
// Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi
|
||||
//
|
||||
// P(eta_o) = e^{- eta_o^dag eta_o}
|
||||
//
|
||||
// e^{x^2/2 sig^2} => sig^2 = 0.5.
|
||||
//
|
||||
RealD scale = std::sqrt(0.5);
|
||||
|
||||
FermionField eta (NumOp.FermionGrid());
|
||||
FermionField etaOdd (NumOp.FermionRedBlackGrid());
|
||||
FermionField etaEven(NumOp.FermionRedBlackGrid());
|
||||
FermionField tmp (NumOp.FermionRedBlackGrid());
|
||||
|
||||
gaussian(pRNG,eta);
|
||||
|
||||
pickCheckerboard(Even,etaEven,eta);
|
||||
pickCheckerboard(Odd,etaOdd,eta);
|
||||
|
||||
NumOp.ImportGauge(U);
|
||||
DenOp.ImportGauge(U);
|
||||
|
||||
SchurDifferentiableOperator<Impl> Mpc(DenOp);
|
||||
SchurDifferentiableOperator<Impl> Vpc(NumOp);
|
||||
|
||||
// Odd det factors
|
||||
Mpc.MpcDag(etaOdd,PhiOdd);
|
||||
tmp=zero;
|
||||
ActionSolver(Vpc,PhiOdd,tmp);
|
||||
Vpc.Mpc(tmp,PhiOdd);
|
||||
|
||||
// Even det factors
|
||||
DenOp.MooeeDag(etaEven,tmp);
|
||||
NumOp.MooeeInvDag(tmp,PhiEven);
|
||||
|
||||
PhiOdd =PhiOdd*scale;
|
||||
PhiEven=PhiEven*scale;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// S = phi^dag V (Mdag M)^-1 Vdag phi
|
||||
//////////////////////////////////////////////////////
|
||||
virtual RealD S(const GaugeField &U) {
|
||||
|
||||
NumOp.ImportGauge(U);
|
||||
DenOp.ImportGauge(U);
|
||||
|
||||
SchurDifferentiableOperator<Impl> Mpc(DenOp);
|
||||
SchurDifferentiableOperator<Impl> Vpc(NumOp);
|
||||
|
||||
FermionField X(NumOp.FermionRedBlackGrid());
|
||||
FermionField Y(NumOp.FermionRedBlackGrid());
|
||||
|
||||
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
|
||||
X=zero;
|
||||
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
|
||||
//Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
|
||||
// Multiply by Ydag
|
||||
RealD action = real(innerProduct(Y,X));
|
||||
|
||||
//RealD action = norm2(Y);
|
||||
|
||||
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
|
||||
// Only really clover term that creates this. Leave the EE portion as a future to do to make most
|
||||
// rapid progresss on DWF for now.
|
||||
//
|
||||
NumOp.MooeeDag(PhiEven,X);
|
||||
DenOp.MooeeInvDag(X,Y);
|
||||
action = action + norm2(Y);
|
||||
|
||||
return action;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// dS/du = phi^dag dV (Mdag M)^-1 V^dag phi
|
||||
// - phi^dag V (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 V^dag phi
|
||||
// + phi^dag V (Mdag M)^-1 dV^dag phi
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
|
||||
|
||||
NumOp.ImportGauge(U);
|
||||
DenOp.ImportGauge(U);
|
||||
|
||||
SchurDifferentiableOperator<Impl> Mpc(DenOp);
|
||||
SchurDifferentiableOperator<Impl> Vpc(NumOp);
|
||||
|
||||
FermionField X(NumOp.FermionRedBlackGrid());
|
||||
FermionField Y(NumOp.FermionRedBlackGrid());
|
||||
|
||||
// This assignment is necessary to be compliant with the HMC grids
|
||||
GaugeField force(dSdU._grid);
|
||||
|
||||
//Y=Vdag phi
|
||||
//X = (Mdag M)^-1 V^dag phi
|
||||
//Y = (Mdag)^-1 V^dag phi
|
||||
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
|
||||
X=zero;
|
||||
DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
|
||||
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
|
||||
|
||||
// phi^dag V (Mdag M)^-1 dV^dag phi
|
||||
Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU = force;
|
||||
|
||||
// phi^dag dV (Mdag M)^-1 V^dag phi
|
||||
Vpc.MpcDeriv(force , PhiOdd, X ); dSdU = dSdU+force;
|
||||
|
||||
// - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi
|
||||
// - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi
|
||||
Mpc.MpcDeriv(force,Y,X); dSdU = dSdU-force;
|
||||
Mpc.MpcDagDeriv(force,X,Y); dSdU = dSdU-force;
|
||||
|
||||
// FIXME No force contribution from EvenEven assumed here
|
||||
// Needs a fix for clover.
|
||||
assert(NumOp.ConstEE() == 1);
|
||||
assert(DenOp.ConstEE() == 1);
|
||||
|
||||
dSdU = -dSdU;
|
||||
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
@ -1,50 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/Scalar.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_SCALAR_H
|
||||
#define GRID_QCD_SCALAR_H
|
||||
|
||||
#include <Grid/qcd/action/scalar/ScalarImpl.h>
|
||||
#include <Grid/qcd/action/scalar/ScalarAction.h>
|
||||
#include <Grid/qcd/action/scalar/ScalarInteractionAction.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
typedef ScalarAction<ScalarImplR> ScalarActionR;
|
||||
typedef ScalarAction<ScalarImplF> ScalarActionF;
|
||||
typedef ScalarAction<ScalarImplD> ScalarActionD;
|
||||
|
||||
template <int Colours, int Dimensions> using ScalarAdjActionR = ScalarInteractionAction<ScalarNxNAdjImplR<Colours>, Dimensions>;
|
||||
template <int Colours, int Dimensions> using ScalarAdjActionF = ScalarInteractionAction<ScalarNxNAdjImplF<Colours>, Dimensions>;
|
||||
template <int Colours, int Dimensions> using ScalarAdjActionD = ScalarInteractionAction<ScalarNxNAdjImplD<Colours>, Dimensions>;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // GRID_QCD_SCALAR_H
|
@ -1,83 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef SCALAR_ACTION_H
|
||||
#define SCALAR_ACTION_H
|
||||
|
||||
namespace Grid {
|
||||
// FIXME drop the QCD namespace everywhere here
|
||||
|
||||
template <class Impl>
|
||||
class ScalarAction : public QCD::Action<typename Impl::Field> {
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(Impl);
|
||||
|
||||
private:
|
||||
RealD mass_square;
|
||||
RealD lambda;
|
||||
|
||||
public:
|
||||
ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {}
|
||||
|
||||
virtual std::string LogParameters() {
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl;
|
||||
sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
virtual std::string action_name() {return "ScalarAction";}
|
||||
|
||||
virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} // noop as no pseudoferms
|
||||
|
||||
virtual RealD S(const Field &p) {
|
||||
return (mass_square * 0.5 + QCD::Nd) * ScalarObs<Impl>::sumphisquared(p) +
|
||||
(lambda / 24.) * ScalarObs<Impl>::sumphifourth(p) +
|
||||
ScalarObs<Impl>::sumphider(p);
|
||||
};
|
||||
|
||||
virtual void deriv(const Field &p,
|
||||
Field &force) {
|
||||
Field tmp(p._grid);
|
||||
Field p2(p._grid);
|
||||
ScalarObs<Impl>::phisquared(p2, p);
|
||||
tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1));
|
||||
for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
|
||||
|
||||
force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace Grid
|
||||
|
||||
#endif // SCALAR_ACTION_H
|
@ -1,263 +0,0 @@
|
||||
#ifndef SCALAR_IMPL
|
||||
#define SCALAR_IMPL
|
||||
|
||||
|
||||
namespace Grid {
|
||||
//namespace QCD {
|
||||
|
||||
template <class S>
|
||||
class ScalarImplTypes {
|
||||
public:
|
||||
typedef S Simd;
|
||||
|
||||
template <typename vtype>
|
||||
using iImplField = iScalar<iScalar<iScalar<vtype> > >;
|
||||
|
||||
typedef iImplField<Simd> SiteField;
|
||||
typedef SiteField SitePropagator;
|
||||
typedef SiteField SiteComplex;
|
||||
|
||||
typedef Lattice<SiteField> Field;
|
||||
typedef Field ComplexField;
|
||||
typedef Field FermionField;
|
||||
typedef Field PropagatorField;
|
||||
|
||||
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
|
||||
gaussian(pRNG, P);
|
||||
}
|
||||
|
||||
static inline Field projectForce(Field& P){return P;}
|
||||
|
||||
static inline void update_field(Field& P, Field& U, double ep) {
|
||||
U += P*ep;
|
||||
}
|
||||
|
||||
static inline RealD FieldSquareNorm(Field& U) {
|
||||
return (- sum(trace(U*U))/2.0);
|
||||
}
|
||||
|
||||
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
gaussian(pRNG, U);
|
||||
}
|
||||
|
||||
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
gaussian(pRNG, U);
|
||||
}
|
||||
|
||||
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
U = 1.0;
|
||||
}
|
||||
|
||||
static void MomentumSpacePropagator(Field &out, RealD m)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
Field kmu(grid), one(grid);
|
||||
const unsigned int nd = grid->_ndimension;
|
||||
std::vector<int> &l = grid->_fdimensions;
|
||||
|
||||
one = Complex(1.0,0.0);
|
||||
out = m*m;
|
||||
for(int mu = 0; mu < nd; mu++)
|
||||
{
|
||||
Real twoPiL = M_PI*2./l[mu];
|
||||
|
||||
LatticeCoordinate(kmu,mu);
|
||||
kmu = 2.*sin(.5*twoPiL*kmu);
|
||||
out = out + kmu*kmu;
|
||||
}
|
||||
out = one/out;
|
||||
}
|
||||
|
||||
static void FreePropagator(const Field &in, Field &out,
|
||||
const Field &momKernel)
|
||||
{
|
||||
FFT fft((GridCartesian *)in._grid);
|
||||
Field inFT(in._grid);
|
||||
|
||||
fft.FFT_all_dim(inFT, in, FFT::forward);
|
||||
inFT = inFT*momKernel;
|
||||
fft.FFT_all_dim(out, inFT, FFT::backward);
|
||||
}
|
||||
|
||||
static void FreePropagator(const Field &in, Field &out, RealD m)
|
||||
{
|
||||
Field momKernel(in._grid);
|
||||
|
||||
MomentumSpacePropagator(momKernel, m);
|
||||
FreePropagator(in, out, momKernel);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#ifdef USE_FFT_ACCELERATION
|
||||
#ifndef FFT_MASS
|
||||
#error "USE_FFT_ACCELERATION is defined but not FFT_MASS"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <class S, unsigned int N>
|
||||
class ScalarAdjMatrixImplTypes {
|
||||
public:
|
||||
typedef S Simd;
|
||||
typedef QCD::SU<N> Group;
|
||||
|
||||
template <typename vtype>
|
||||
using iImplField = iScalar<iScalar<iMatrix<vtype, N>>>;
|
||||
template <typename vtype>
|
||||
using iImplComplex = iScalar<iScalar<iScalar<vtype>>>;
|
||||
|
||||
typedef iImplField<Simd> SiteField;
|
||||
typedef SiteField SitePropagator;
|
||||
typedef iImplComplex<Simd> SiteComplex;
|
||||
|
||||
typedef Lattice<SiteField> Field;
|
||||
typedef Lattice<SiteComplex> ComplexField;
|
||||
typedef Field FermionField;
|
||||
typedef Field PropagatorField;
|
||||
|
||||
static void MomentaSquare(ComplexField &out)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
const std::vector<int> &l = grid->FullDimensions();
|
||||
ComplexField kmu(grid);
|
||||
|
||||
for (int mu = 0; mu < grid->Nd(); mu++)
|
||||
{
|
||||
Real twoPiL = M_PI * 2.0 / l[mu];
|
||||
LatticeCoordinate(kmu, mu);
|
||||
kmu = 2.0 * sin(0.5 * twoPiL * kmu);
|
||||
out += kmu * kmu;
|
||||
}
|
||||
}
|
||||
|
||||
static void MomentumSpacePropagator(ComplexField &out, RealD m)
|
||||
{
|
||||
GridBase *grid = out._grid;
|
||||
ComplexField one(grid);
|
||||
one = Complex(1.0, 0.0);
|
||||
out = m * m;
|
||||
MomentaSquare(out);
|
||||
out = one / out;
|
||||
}
|
||||
|
||||
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
|
||||
{
|
||||
#ifndef USE_FFT_ACCELERATION
|
||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
|
||||
#else
|
||||
|
||||
Field Pgaussian(P._grid), Pp(P._grid);
|
||||
ComplexField p2(P._grid); p2 = zero;
|
||||
RealD M = FFT_MASS;
|
||||
|
||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
|
||||
|
||||
FFT theFFT((GridCartesian*)P._grid);
|
||||
theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward);
|
||||
MomentaSquare(p2);
|
||||
p2 += M * M;
|
||||
p2 = sqrt(p2);
|
||||
Pp *= p2;
|
||||
theFFT.FFT_all_dim(P, Pp, FFT::backward);
|
||||
|
||||
#endif //USE_FFT_ACCELERATION
|
||||
}
|
||||
|
||||
static inline Field projectForce(Field& P) {return P;}
|
||||
|
||||
static inline void update_field(Field &P, Field &U, double ep)
|
||||
{
|
||||
#ifndef USE_FFT_ACCELERATION
|
||||
double t0=usecond();
|
||||
U += P * ep;
|
||||
double t1=usecond();
|
||||
double total_time = (t1-t0)/1e6;
|
||||
std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl;
|
||||
#else
|
||||
// FFT transform P(x) -> P(p)
|
||||
// divide by (M^2+p^2) M external parameter (how to pass?)
|
||||
// P'(p) = P(p)/(M^2+p^2)
|
||||
// Transform back -> P'(x)
|
||||
// U += P'(x)*ep
|
||||
|
||||
Field Pp(U._grid), P_FFT(U._grid);
|
||||
static ComplexField p2(U._grid);
|
||||
RealD M = FFT_MASS;
|
||||
|
||||
FFT theFFT((GridCartesian*)U._grid);
|
||||
theFFT.FFT_all_dim(Pp, P, FFT::forward);
|
||||
|
||||
static bool first_call = true;
|
||||
if (first_call)
|
||||
{
|
||||
// avoid recomputing
|
||||
MomentumSpacePropagator(p2, M);
|
||||
first_call = false;
|
||||
}
|
||||
Pp *= p2;
|
||||
theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward);
|
||||
U += P_FFT * ep;
|
||||
|
||||
#endif //USE_FFT_ACCELERATION
|
||||
}
|
||||
|
||||
static inline RealD FieldSquareNorm(Field &U)
|
||||
{
|
||||
#ifndef USE_FFT_ACCELERATION
|
||||
return (TensorRemove(sum(trace(U * U))).real());
|
||||
#else
|
||||
// In case of Fourier acceleration we have to:
|
||||
// compute U(p)*U(p)/(M^2+p^2)) Parseval theorem
|
||||
// 1 FFT needed U(x) -> U(p)
|
||||
// M to be passed
|
||||
|
||||
FFT theFFT((GridCartesian*)U._grid);
|
||||
Field Up(U._grid);
|
||||
|
||||
theFFT.FFT_all_dim(Up, U, FFT::forward);
|
||||
RealD M = FFT_MASS;
|
||||
ComplexField p2(U._grid);
|
||||
MomentumSpacePropagator(p2, M);
|
||||
Field Up2 = Up * p2;
|
||||
// from the definition of the DFT we need to divide by the volume
|
||||
return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites());
|
||||
#endif //USE_FFT_ACCELERATION
|
||||
}
|
||||
|
||||
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U);
|
||||
}
|
||||
|
||||
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01);
|
||||
}
|
||||
|
||||
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||
U = zero;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
typedef ScalarImplTypes<vReal> ScalarImplR;
|
||||
typedef ScalarImplTypes<vRealF> ScalarImplF;
|
||||
typedef ScalarImplTypes<vRealD> ScalarImplD;
|
||||
typedef ScalarImplTypes<vComplex> ScalarImplCR;
|
||||
typedef ScalarImplTypes<vComplexF> ScalarImplCF;
|
||||
typedef ScalarImplTypes<vComplexD> ScalarImplCD;
|
||||
|
||||
// Hardcoding here the size of the matrices
|
||||
typedef ScalarAdjMatrixImplTypes<vComplex, QCD::Nc> ScalarAdjImplR;
|
||||
typedef ScalarAdjMatrixImplTypes<vComplexF, QCD::Nc> ScalarAdjImplF;
|
||||
typedef ScalarAdjMatrixImplTypes<vComplexD, QCD::Nc> ScalarAdjImplD;
|
||||
|
||||
template <int Colours > using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes<vComplex, Colours >;
|
||||
template <int Colours > using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes<vComplexF, Colours >;
|
||||
template <int Colours > using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes<vComplexD, Colours >;
|
||||
|
||||
//}
|
||||
}
|
||||
|
||||
#endif
|
@ -1,208 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Guido Cossu <guido,cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef SCALAR_INT_ACTION_H
|
||||
#define SCALAR_INT_ACTION_H
|
||||
|
||||
// Note: this action can completely absorb the ScalarAction for real float fields
|
||||
// use the scalarObjs to generalise the structure
|
||||
|
||||
namespace Grid
|
||||
{
|
||||
// FIXME drop the QCD namespace everywhere here
|
||||
|
||||
template <class Impl, int Ndim>
|
||||
class ScalarInteractionAction : public QCD::Action<typename Impl::Field>
|
||||
{
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(Impl);
|
||||
|
||||
private:
|
||||
RealD mass_square;
|
||||
RealD lambda;
|
||||
RealD g;
|
||||
const unsigned int N = Impl::Group::Dimension;
|
||||
|
||||
typedef typename Field::vector_object vobj;
|
||||
typedef CartesianStencil<vobj, vobj> Stencil;
|
||||
|
||||
SimpleCompressor<vobj> compressor;
|
||||
int npoint = 2 * Ndim;
|
||||
std::vector<int> directions; //
|
||||
std::vector<int> displacements; //
|
||||
|
||||
public:
|
||||
ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0)
|
||||
{
|
||||
for (int mu = 0; mu < Ndim; mu++)
|
||||
{
|
||||
directions[mu] = mu;
|
||||
directions[mu + Ndim] = mu;
|
||||
displacements[mu] = 1;
|
||||
displacements[mu + Ndim] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
virtual std::string LogParameters()
|
||||
{
|
||||
std::stringstream sstream;
|
||||
sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl;
|
||||
sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
|
||||
sstream << GridLogMessage << "[ScalarAction] g : " << g << std::endl;
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
virtual std::string action_name() { return "ScalarAction"; }
|
||||
|
||||
virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
|
||||
|
||||
virtual RealD S(const Field &p)
|
||||
{
|
||||
assert(p._grid->Nd() == Ndim);
|
||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||
phiStencil.HaloExchange(p, compressor);
|
||||
Field action(p._grid), pshift(p._grid), phisquared(p._grid);
|
||||
phisquared = p * p;
|
||||
action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
|
||||
for (int mu = 0; mu < Ndim; mu++)
|
||||
{
|
||||
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
|
||||
parallel_for(int i = 0; i < p._grid->oSites(); i++)
|
||||
{
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
vobj temp2;
|
||||
const vobj *temp, *t_p;
|
||||
|
||||
SE = phiStencil.GetEntry(permute_type, mu, i);
|
||||
t_p = &p._odata[i];
|
||||
if (SE->_is_local)
|
||||
{
|
||||
temp = &p._odata[SE->_offset];
|
||||
if (SE->_permute)
|
||||
{
|
||||
permute(temp2, *temp, permute_type);
|
||||
action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
|
||||
}
|
||||
else
|
||||
{
|
||||
action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
}
|
||||
// action -= pshift*p + p*pshift;
|
||||
}
|
||||
// NB the trace in the algebra is normalised to 1/2
|
||||
// minus sign coming from the antihermitian fields
|
||||
return -(TensorRemove(sum(trace(action)))).real() * N / g;
|
||||
};
|
||||
|
||||
virtual void deriv(const Field &p, Field &force)
|
||||
{
|
||||
double t0 = usecond();
|
||||
assert(p._grid->Nd() == Ndim);
|
||||
force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
|
||||
double interm_t = usecond();
|
||||
|
||||
// move this outside
|
||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||
|
||||
phiStencil.HaloExchange(p, compressor);
|
||||
double halo_t = usecond();
|
||||
int chunk = 128;
|
||||
//for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
|
||||
|
||||
// inverting the order of the loops slows down the code(! g++ 7)
|
||||
// cannot try to reduce the number of force writes by factor npoint...
|
||||
// use cache blocking
|
||||
for (int point = 0; point < npoint; point++)
|
||||
{
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int permute_type;
|
||||
StencilEntry *SE;
|
||||
const vobj *temp;
|
||||
|
||||
#pragma omp for schedule(static, chunk)
|
||||
for (int i = 0; i < p._grid->oSites(); i++)
|
||||
{
|
||||
SE = phiStencil.GetEntry(permute_type, point, i);
|
||||
// prefetch next p?
|
||||
|
||||
if (SE->_is_local)
|
||||
{
|
||||
temp = &p._odata[SE->_offset];
|
||||
|
||||
if (SE->_permute)
|
||||
{
|
||||
vobj temp2;
|
||||
permute(temp2, *temp, permute_type);
|
||||
force._odata[i] -= temp2;
|
||||
}
|
||||
else
|
||||
{
|
||||
force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
force *= N / g;
|
||||
|
||||
double t1 = usecond();
|
||||
double total_time = (t1 - t0) / 1e6;
|
||||
double interm_time = (interm_t - t0) / 1e6;
|
||||
double halo_time = (halo_t - interm_t) / 1e6;
|
||||
double stencil_time = (t1 - halo_t) / 1e6;
|
||||
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
|
||||
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
|
||||
double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
|
||||
double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
|
||||
double Gflops = flops / (total_time * 1e9);
|
||||
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
|
||||
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
|
||||
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Grid
|
||||
|
||||
#endif // SCALAR_INT_ACTION_H
|
@ -1,219 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_GENERIC_HMC_RUNNER
|
||||
#define GRID_GENERIC_HMC_RUNNER
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
// very ugly here but possibly resolved if we had a base Reader class
|
||||
template < class ReaderClass >
|
||||
class HMCRunnerBase {
|
||||
public:
|
||||
virtual void Run() = 0;
|
||||
virtual void initialize(ReaderClass& ) = 0;
|
||||
};
|
||||
|
||||
|
||||
template <class Implementation,
|
||||
template <typename, typename, typename> class Integrator,
|
||||
class RepresentationsPolicy = NoHirep, class ReaderClass = XmlReader>
|
||||
class HMCWrapperTemplate: public HMCRunnerBase<ReaderClass> {
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(Implementation);
|
||||
typedef Implementation ImplPolicy; // visible from outside
|
||||
template <typename S = NoSmearing<Implementation> >
|
||||
using IntegratorType = Integrator<Implementation, S, RepresentationsPolicy>;
|
||||
|
||||
HMCparameters Parameters;
|
||||
std::string ParameterFile;
|
||||
HMCResourceManager<Implementation> Resources;
|
||||
|
||||
// The set of actions (keep here for lower level users, for now)
|
||||
ActionSet<Field, RepresentationsPolicy> TheAction;
|
||||
|
||||
HMCWrapperTemplate() = default;
|
||||
|
||||
HMCWrapperTemplate(HMCparameters Par){
|
||||
Parameters = Par;
|
||||
}
|
||||
|
||||
void initialize(ReaderClass & TheReader){
|
||||
std::cout << "Initialization of the HMC" << std::endl;
|
||||
Resources.initialize(TheReader);
|
||||
|
||||
// eventually add smearing
|
||||
|
||||
Resources.GetActionSet(TheAction);
|
||||
}
|
||||
|
||||
|
||||
void ReadCommandLine(int argc, char **argv) {
|
||||
std::string arg;
|
||||
|
||||
if (GridCmdOptionExists(argv, argv + argc, "--StartingType")) {
|
||||
arg = GridCmdOptionPayload(argv, argv + argc, "--StartingType");
|
||||
|
||||
if (arg != "HotStart" && arg != "ColdStart" && arg != "TepidStart" &&
|
||||
arg != "CheckpointStart") {
|
||||
std::cout << GridLogError << "Unrecognized option in --StartingType\n";
|
||||
std::cout
|
||||
<< GridLogError
|
||||
<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||
exit(1);
|
||||
}
|
||||
Parameters.StartingType = arg;
|
||||
}
|
||||
|
||||
if (GridCmdOptionExists(argv, argv + argc, "--StartingTrajectory")) {
|
||||
arg = GridCmdOptionPayload(argv, argv + argc, "--StartingTrajectory");
|
||||
std::vector<int> ivec(0);
|
||||
GridCmdOptionIntVector(arg, ivec);
|
||||
Parameters.StartTrajectory = ivec[0];
|
||||
}
|
||||
|
||||
if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) {
|
||||
arg = GridCmdOptionPayload(argv, argv + argc, "--Trajectories");
|
||||
std::vector<int> ivec(0);
|
||||
GridCmdOptionIntVector(arg, ivec);
|
||||
Parameters.Trajectories = ivec[0];
|
||||
}
|
||||
|
||||
if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) {
|
||||
arg = GridCmdOptionPayload(argv, argv + argc, "--Thermalizations");
|
||||
std::vector<int> ivec(0);
|
||||
GridCmdOptionIntVector(arg, ivec);
|
||||
Parameters.NoMetropolisUntil = ivec[0];
|
||||
}
|
||||
if (GridCmdOptionExists(argv, argv + argc, "--ParameterFile")) {
|
||||
arg = GridCmdOptionPayload(argv, argv + argc, "--ParameterFile");
|
||||
ParameterFile = arg;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class SmearingPolicy>
|
||||
void Run(SmearingPolicy &S) {
|
||||
Runner(S);
|
||||
}
|
||||
|
||||
void Run(){
|
||||
NoSmearing<Implementation> S;
|
||||
Runner(S);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
||||
private:
|
||||
template <class SmearingPolicy>
|
||||
void Runner(SmearingPolicy &Smearing) {
|
||||
auto UGrid = Resources.GetCartesian();
|
||||
Resources.AddRNGs();
|
||||
Field U(UGrid);
|
||||
|
||||
// Can move this outside?
|
||||
typedef IntegratorType<SmearingPolicy> TheIntegrator;
|
||||
TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
|
||||
|
||||
if (Parameters.StartingType == "HotStart") {
|
||||
// Hot start
|
||||
Resources.SeedFixedIntegers();
|
||||
Implementation::HotConfiguration(Resources.GetParallelRNG(), U);
|
||||
} else if (Parameters.StartingType == "ColdStart") {
|
||||
// Cold start
|
||||
Resources.SeedFixedIntegers();
|
||||
Implementation::ColdConfiguration(Resources.GetParallelRNG(), U);
|
||||
} else if (Parameters.StartingType == "TepidStart") {
|
||||
// Tepid start
|
||||
Resources.SeedFixedIntegers();
|
||||
Implementation::TepidConfiguration(Resources.GetParallelRNG(), U);
|
||||
} else if (Parameters.StartingType == "CheckpointStart") {
|
||||
// CheckpointRestart
|
||||
Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
|
||||
Resources.GetSerialRNG(),
|
||||
Resources.GetParallelRNG());
|
||||
}
|
||||
|
||||
Smearing.set_Field(U);
|
||||
|
||||
HybridMonteCarlo<TheIntegrator> HMC(Parameters, MDynamics,
|
||||
Resources.GetSerialRNG(),
|
||||
Resources.GetParallelRNG(),
|
||||
Resources.GetObservables(), U);
|
||||
|
||||
// Run it
|
||||
HMC.evolve();
|
||||
}
|
||||
};
|
||||
|
||||
// These are for gauge fields, default integrator MinimumNorm2
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using GenericHMCRunner = HMCWrapperTemplate<PeriodicGimplR, Integrator>;
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using GenericHMCRunnerF = HMCWrapperTemplate<PeriodicGimplF, Integrator>;
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using GenericHMCRunnerD = HMCWrapperTemplate<PeriodicGimplD, Integrator>;
|
||||
|
||||
|
||||
// These are for gauge fields, default integrator MinimumNorm2
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using ConjugateHMCRunner = HMCWrapperTemplate<ConjugateGimplR, Integrator>;
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using ConjugateHMCRunnerF = HMCWrapperTemplate<ConjugateGimplF, Integrator>;
|
||||
template <template <typename, typename, typename> class Integrator>
|
||||
using ConjugateHMCRunnerD = HMCWrapperTemplate<ConjugateGimplD, Integrator>;
|
||||
|
||||
|
||||
|
||||
template <class RepresentationsPolicy,
|
||||
template <typename, typename, typename> class Integrator>
|
||||
using GenericHMCRunnerHirep =
|
||||
HMCWrapperTemplate<PeriodicGimplR, Integrator, RepresentationsPolicy>;
|
||||
|
||||
template <class Implementation, class RepresentationsPolicy,
|
||||
template <typename, typename, typename> class Integrator>
|
||||
using GenericHMCRunnerTemplate = HMCWrapperTemplate<Implementation, Integrator, RepresentationsPolicy>;
|
||||
|
||||
typedef HMCWrapperTemplate<ScalarImplR, MinimumNorm2, ScalarFields>
|
||||
ScalarGenericHMCRunner;
|
||||
|
||||
typedef HMCWrapperTemplate<ScalarAdjImplR, MinimumNorm2, ScalarMatrixFields>
|
||||
ScalarAdjGenericHMCRunner;
|
||||
|
||||
template <int Colours>
|
||||
using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, ForceGradient, ScalarNxNMatrixFields<Colours> >;
|
||||
|
||||
} // namespace QCD
|
||||
} // namespace Grid
|
||||
|
||||
#endif // GRID_GENERIC_HMC_RUNNER
|
@ -1,111 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_HMC_MODULES
|
||||
#define GRID_HMC_MODULES
|
||||
|
||||
|
||||
#include "HMC_GridModules.h"
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
struct RNGModuleParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(RNGModuleParameters,
|
||||
std::string, serial_seeds,
|
||||
std::string, parallel_seeds,);
|
||||
|
||||
std::vector<int> getSerialSeeds(){return strToVec<int>(serial_seeds);}
|
||||
std::vector<int> getParallelSeeds(){return strToVec<int>(parallel_seeds);}
|
||||
|
||||
RNGModuleParameters(): serial_seeds("1"), parallel_seeds("1"){}
|
||||
|
||||
template <class ReaderClass >
|
||||
RNGModuleParameters(Reader<ReaderClass>& Reader){
|
||||
read(Reader, "RandomNumberGenerator", *this);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Random number generators module
|
||||
class RNGModule{
|
||||
GridSerialRNG sRNG_;
|
||||
std::unique_ptr<GridParallelRNG> pRNG_;
|
||||
RNGModuleParameters Params_;
|
||||
|
||||
public:
|
||||
|
||||
RNGModule(){};
|
||||
|
||||
void set_pRNG(GridParallelRNG* pRNG){
|
||||
pRNG_.reset(pRNG);
|
||||
}
|
||||
|
||||
void set_RNGSeeds(RNGModuleParameters& Params) {
|
||||
Params_ = Params;
|
||||
}
|
||||
|
||||
GridSerialRNG& get_sRNG() { return sRNG_; }
|
||||
GridParallelRNG& get_pRNG() { return *pRNG_.get(); }
|
||||
|
||||
void seed() {
|
||||
auto SerialSeeds = Params_.getSerialSeeds();
|
||||
auto ParallelSeeds = Params_.getParallelSeeds();
|
||||
if (SerialSeeds.size() == 0 && ParallelSeeds.size() == 0) {
|
||||
std::cout << GridLogError << "Seeds not initialized" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
sRNG_.SeedFixedIntegers(SerialSeeds);
|
||||
pRNG_->SeedFixedIntegers(ParallelSeeds);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
///////////////////////////////////////////////////////////////////
|
||||
/// Smearing module
|
||||
template <class ImplementationPolicy>
|
||||
class SmearingModule{
|
||||
virtual void get_smearing();
|
||||
};
|
||||
|
||||
template <class ImplementationPolicy>
|
||||
class StoutSmearingModule: public SmearingModule<ImplementationPolicy>{
|
||||
SmearedConfiguration<ImplementationPolicy> SmearingPolicy;
|
||||
};
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
} // namespace QCD
|
||||
} // namespace Grid
|
||||
|
||||
#endif // GRID_HMC_MODULES
|
@ -1,328 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef HMC_RESOURCE_MANAGER_H
|
||||
#define HMC_RESOURCE_MANAGER_H
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
// One function per Checkpointer, use a macro to simplify
|
||||
#define RegisterLoadCheckPointerFunction(NAME) \
|
||||
void Load##NAME##Checkpointer(const CheckpointerParameters& Params_) { \
|
||||
if (!have_CheckPointer) { \
|
||||
std::cout << GridLogDebug << "Loading Checkpointer " << #NAME \
|
||||
<< std::endl; \
|
||||
CP = std::unique_ptr<CheckpointerBaseModule>( \
|
||||
new NAME##CPModule<ImplementationPolicy>(Params_)); \
|
||||
have_CheckPointer = true; \
|
||||
} else { \
|
||||
std::cout << GridLogError << "Checkpointer already loaded " \
|
||||
<< std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RegisterLoadCheckPointerMetadataFunction(NAME) \
|
||||
template < class Metadata > \
|
||||
void Load##NAME##Checkpointer(const CheckpointerParameters& Params_, const Metadata& M_) { \
|
||||
if (!have_CheckPointer) { \
|
||||
std::cout << GridLogDebug << "Loading Metadata Checkpointer " << #NAME \
|
||||
<< std::endl; \
|
||||
CP = std::unique_ptr<CheckpointerBaseModule>( \
|
||||
new NAME##CPModule<ImplementationPolicy, Metadata >(Params_, M_)); \
|
||||
have_CheckPointer = true; \
|
||||
} else { \
|
||||
std::cout << GridLogError << "Checkpointer already loaded " \
|
||||
<< std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// HMC Resource manager
|
||||
template <class ImplementationPolicy>
|
||||
class HMCResourceManager {
|
||||
typedef HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > CheckpointerBaseModule;
|
||||
typedef HMCModuleBase< QCD::HmcObservable<typename ImplementationPolicy::Field> > ObservableBaseModule;
|
||||
typedef ActionModuleBase< QCD::Action<typename ImplementationPolicy::Field>, GridModule > ActionBaseModule;
|
||||
|
||||
// Named storage for grid pairs (std + red-black)
|
||||
std::unordered_map<std::string, GridModule> Grids;
|
||||
RNGModule RNGs;
|
||||
|
||||
// SmearingModule<ImplementationPolicy> Smearing;
|
||||
std::unique_ptr<CheckpointerBaseModule> CP;
|
||||
|
||||
// A vector of HmcObservable modules
|
||||
std::vector<std::unique_ptr<ObservableBaseModule> > ObservablesList;
|
||||
|
||||
|
||||
// A vector of HmcObservable modules
|
||||
std::multimap<int, std::unique_ptr<ActionBaseModule> > ActionsList;
|
||||
std::vector<int> multipliers;
|
||||
|
||||
bool have_RNG;
|
||||
bool have_CheckPointer;
|
||||
|
||||
// NOTE: operator << is not overloaded for std::vector<string>
|
||||
// so this function is necessary
|
||||
void output_vector_string(const std::vector<std::string> &vs){
|
||||
for (auto &i: vs)
|
||||
std::cout << i << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {}
|
||||
|
||||
template <class ReaderClass, class vector_type = vComplex >
|
||||
void initialize(ReaderClass &Read){
|
||||
// assumes we are starting from the main node
|
||||
|
||||
// Geometry
|
||||
GridModuleParameters GridPar(Read);
|
||||
GridFourDimModule<vector_type> GridMod( GridPar) ;
|
||||
AddGrid("gauge", GridMod);
|
||||
|
||||
// Checkpointer
|
||||
auto &CPfactory = HMC_CPModuleFactory<cp_string, ImplementationPolicy, ReaderClass >::getInstance();
|
||||
Read.push("Checkpointer");
|
||||
std::string cp_type;
|
||||
read(Read,"name", cp_type);
|
||||
std::cout << "Registered types " << std::endl;
|
||||
output_vector_string(CPfactory.getBuilderList());
|
||||
|
||||
|
||||
CP = CPfactory.create(cp_type, Read);
|
||||
CP->print_parameters();
|
||||
Read.pop();
|
||||
have_CheckPointer = true;
|
||||
|
||||
RNGModuleParameters RNGpar(Read);
|
||||
SetRNGSeeds(RNGpar);
|
||||
|
||||
// Observables
|
||||
auto &ObsFactory = HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance();
|
||||
Read.push(observable_string);// here must check if existing...
|
||||
do {
|
||||
std::string obs_type;
|
||||
read(Read,"name", obs_type);
|
||||
std::cout << "Registered types " << std::endl;
|
||||
output_vector_string(ObsFactory.getBuilderList() );
|
||||
|
||||
ObservablesList.emplace_back(ObsFactory.create(obs_type, Read));
|
||||
ObservablesList[ObservablesList.size() - 1]->print_parameters();
|
||||
} while (Read.nextElement(observable_string));
|
||||
Read.pop();
|
||||
|
||||
// Loop on levels
|
||||
if(!Read.push("Actions")){
|
||||
std::cout << "Actions not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if(!Read.push("Level")){// push must check if the node exist
|
||||
std::cout << "Level not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
do
|
||||
{
|
||||
fill_ActionsLevel(Read);
|
||||
}
|
||||
while(Read.push("Level"));
|
||||
|
||||
Read.pop();
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <class RepresentationPolicy>
|
||||
void GetActionSet(ActionSet<typename ImplementationPolicy::Field, RepresentationPolicy>& Aset){
|
||||
Aset.resize(multipliers.size());
|
||||
|
||||
for(auto it = ActionsList.begin(); it != ActionsList.end(); it++){
|
||||
(*it).second->acquireResource(Grids["gauge"]);
|
||||
Aset[(*it).first-1].push_back((*it).second->getPtr());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Grids
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
void AddGrid(const std::string s, GridModule& M) {
|
||||
// Check for name clashes
|
||||
auto search = Grids.find(s);
|
||||
if (search != Grids.end()) {
|
||||
std::cout << GridLogError << "Grid with name \"" << search->first
|
||||
<< "\" already present. Terminating\n";
|
||||
exit(1);
|
||||
}
|
||||
Grids[s] = std::move(M);
|
||||
std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl;
|
||||
std::cout << GridLogMessage << "HMCResourceManager:" << std::endl;
|
||||
std::cout << GridLogMessage << "Created grid set with name '" << s << "' and decomposition for the full cartesian " << std::endl;
|
||||
Grids[s].show_full_decomposition();
|
||||
std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl;
|
||||
}
|
||||
|
||||
// Add a named grid set, 4d shortcut
|
||||
void AddFourDimGrid(const std::string s) {
|
||||
GridFourDimModule<vComplex> Mod;
|
||||
AddGrid(s, Mod);
|
||||
}
|
||||
|
||||
// Add a named grid set, 4d shortcut + tweak simd lanes
|
||||
void AddFourDimGrid(const std::string s, const std::vector<int> simd_decomposition) {
|
||||
GridFourDimModule<vComplex> Mod(simd_decomposition);
|
||||
AddGrid(s, Mod);
|
||||
}
|
||||
|
||||
|
||||
GridCartesian* GetCartesian(std::string s = "") {
|
||||
if (s.empty()) s = Grids.begin()->first;
|
||||
std::cout << GridLogDebug << "Getting cartesian grid from: " << s
|
||||
<< std::endl;
|
||||
return Grids[s].get_full();
|
||||
}
|
||||
|
||||
GridRedBlackCartesian* GetRBCartesian(std::string s = "") {
|
||||
if (s.empty()) s = Grids.begin()->first;
|
||||
std::cout << GridLogDebug << "Getting rb-cartesian grid from: " << s
|
||||
<< std::endl;
|
||||
return Grids[s].get_rb();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Random number generators
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
void AddRNGs(std::string s = "") {
|
||||
// Couple the RNGs to the GridModule tagged by s
|
||||
// the default is the first grid registered
|
||||
assert(Grids.size() > 0 && !have_RNG);
|
||||
if (s.empty()) s = Grids.begin()->first;
|
||||
std::cout << GridLogDebug << "Adding RNG to grid: " << s << std::endl;
|
||||
RNGs.set_pRNG(new GridParallelRNG(GetCartesian(s)));
|
||||
have_RNG = true;
|
||||
}
|
||||
|
||||
void SetRNGSeeds(RNGModuleParameters& Params) { RNGs.set_RNGSeeds(Params); }
|
||||
|
||||
GridSerialRNG& GetSerialRNG() { return RNGs.get_sRNG(); }
|
||||
|
||||
GridParallelRNG& GetParallelRNG() {
|
||||
assert(have_RNG);
|
||||
return RNGs.get_pRNG();
|
||||
}
|
||||
|
||||
void SeedFixedIntegers() {
|
||||
assert(have_RNG);
|
||||
RNGs.seed();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Checkpointers
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
BaseHmcCheckpointer<ImplementationPolicy>* GetCheckPointer() {
|
||||
if (have_CheckPointer)
|
||||
return CP->getPtr();
|
||||
else {
|
||||
std::cout << GridLogError << "Error: no checkpointer defined"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
RegisterLoadCheckPointerFunction(Binary);
|
||||
RegisterLoadCheckPointerFunction(Nersc);
|
||||
#ifdef HAVE_LIME
|
||||
RegisterLoadCheckPointerFunction(ILDG);
|
||||
RegisterLoadCheckPointerMetadataFunction(Scidac);
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Observables
|
||||
////////////////////////////////////////////////////////
|
||||
|
||||
template<class T, class... Types>
|
||||
void AddObservable(Types&&... Args){
|
||||
ObservablesList.push_back(std::unique_ptr<T>(new T(std::forward<Types>(Args)...)));
|
||||
ObservablesList.back()->print_parameters();
|
||||
}
|
||||
|
||||
std::vector<HmcObservable<typename ImplementationPolicy::Field>* > GetObservables(){
|
||||
std::vector<HmcObservable<typename ImplementationPolicy::Field>* > out;
|
||||
for (auto &i : ObservablesList){
|
||||
out.push_back(i->getPtr());
|
||||
}
|
||||
|
||||
// Add the checkpointer to the observables
|
||||
out.push_back(GetCheckPointer());
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private:
|
||||
// this private
|
||||
template <class ReaderClass >
|
||||
void fill_ActionsLevel(ReaderClass &Read){
|
||||
// Actions set
|
||||
int m;
|
||||
Read.readDefault("multiplier",m);
|
||||
multipliers.push_back(m);
|
||||
std::cout << "Level : " << multipliers.size() << " with multiplier : " << m << std::endl;
|
||||
// here gauge
|
||||
Read.push("Action");
|
||||
do{
|
||||
auto &ActionFactory = HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance();
|
||||
std::string action_type;
|
||||
Read.readDefault("name", action_type);
|
||||
output_vector_string(ActionFactory.getBuilderList() );
|
||||
ActionsList.emplace(m, ActionFactory.create(action_type, Read));
|
||||
} while (Read.nextElement("Action"));
|
||||
ActionsList.find(m)->second->print_parameters();
|
||||
Read.pop();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HMC_RESOURCE_MANAGER_H
|
@ -1,137 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef HMC_RUNNER_MODULE
|
||||
#define HMC_RUNNER_MODULE
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// the reader class is necessary here for the automatic initialization of the resources
|
||||
// if we had a virtual reader would have been unecessary
|
||||
template <class HMCType, class ReaderClass >
|
||||
class HMCModule
|
||||
: public Parametrized< QCD::HMCparameters >,
|
||||
public HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > {
|
||||
public:
|
||||
typedef HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > Base;
|
||||
typedef typename Base::Product Product;
|
||||
|
||||
std::unique_ptr<HMCType> HMCPtr;
|
||||
|
||||
HMCModule(QCD::HMCparameters Par) : Parametrized<QCD::HMCparameters>(Par) {}
|
||||
|
||||
template <class ReaderCl>
|
||||
HMCModule(Reader<ReaderCl>& R) : Parametrized<QCD::HMCparameters>(R, "HMC"){};
|
||||
|
||||
Product* getPtr() {
|
||||
if (!HMCPtr) initialize();
|
||||
|
||||
return HMCPtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
};
|
||||
|
||||
// Factory
|
||||
template <char const *str, class ReaderClass >
|
||||
class HMCRunnerModuleFactory
|
||||
: public Factory < HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > , Reader<ReaderClass> > {
|
||||
public:
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
HMCRunnerModuleFactory(const HMCRunnerModuleFactory& e) = delete;
|
||||
void operator=(const HMCRunnerModuleFactory& e) = delete;
|
||||
static HMCRunnerModuleFactory& getInstance(void) {
|
||||
static HMCRunnerModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMCRunnerModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////
|
||||
// macro for these
|
||||
|
||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
|
||||
class HMCLeapFrog: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>, ReaderClass >{
|
||||
typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>, ReaderClass > HMCBaseMod;
|
||||
using HMCBaseMod::HMCBaseMod;
|
||||
|
||||
// aquire resource
|
||||
virtual void initialize(){
|
||||
this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>(this->Par_) );
|
||||
}
|
||||
};
|
||||
|
||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
|
||||
class HMCMinimumNorm2: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>, ReaderClass >{
|
||||
typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>, ReaderClass > HMCBaseMod;
|
||||
using HMCBaseMod::HMCBaseMod;
|
||||
|
||||
// aquire resource
|
||||
virtual void initialize(){
|
||||
this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>(this->Par_));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
|
||||
class HMCForceGradient: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>, ReaderClass >{
|
||||
typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>, ReaderClass > HMCBaseMod;
|
||||
using HMCBaseMod::HMCBaseMod;
|
||||
|
||||
// aquire resource
|
||||
virtual void initialize(){
|
||||
this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>(this->Par_) );
|
||||
}
|
||||
};
|
||||
|
||||
extern char hmc_string[];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,177 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/HMC_GridModules.h
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef HMC_GRID_MODULES
|
||||
#define HMC_GRID_MODULES
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Resources
|
||||
// Modules for grids
|
||||
|
||||
// Introduce another namespace HMCModules?
|
||||
|
||||
class GridModuleParameters: Serializable{
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(GridModuleParameters,
|
||||
std::string, lattice,
|
||||
std::string, mpi);
|
||||
|
||||
std::vector<int> getLattice() const {return strToVec<int>(lattice);}
|
||||
std::vector<int> getMpi() const {return strToVec<int>(mpi);}
|
||||
|
||||
|
||||
void check() const {
|
||||
if (getLattice().size() != getMpi().size() ) {
|
||||
std::cout << GridLogError
|
||||
<< "Error in GridModuleParameters: lattice and mpi dimensions "
|
||||
"do not match"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
template <class ReaderClass>
|
||||
GridModuleParameters(Reader<ReaderClass>& Reader, std::string n = "LatticeGrid"):name(n) {
|
||||
read(Reader, name, *this);
|
||||
check();
|
||||
}
|
||||
|
||||
// Save on file
|
||||
template< class WriterClass>
|
||||
void save(Writer<WriterClass>& Writer){
|
||||
check();
|
||||
write(Writer, name, *this);
|
||||
}
|
||||
private:
|
||||
std::string name;
|
||||
};
|
||||
|
||||
// Lower level class
|
||||
class GridModule {
|
||||
public:
|
||||
GridCartesian* get_full() {
|
||||
std::cout << GridLogDebug << "Getting cartesian in module"<< std::endl;
|
||||
return grid_.get(); }
|
||||
GridRedBlackCartesian* get_rb() {
|
||||
std::cout << GridLogDebug << "Getting rb-cartesian in module"<< std::endl;
|
||||
return rbgrid_.get(); }
|
||||
|
||||
void set_full(GridCartesian* grid) { grid_.reset(grid); }
|
||||
void set_rb(GridRedBlackCartesian* rbgrid) { rbgrid_.reset(rbgrid); }
|
||||
void show_full_decomposition(){ grid_->show_decomposition(); }
|
||||
void show_rb_decomposition(){ rbgrid_->show_decomposition(); }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<GridCartesian> grid_;
|
||||
std::unique_ptr<GridRedBlackCartesian> rbgrid_;
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
// Classes for the user
|
||||
////////////////////////////////////
|
||||
// Note: the space time grid should be out of the QCD namespace
|
||||
template <class vector_type>
|
||||
class GridFourDimModule : public GridModule
|
||||
{
|
||||
public:
|
||||
GridFourDimModule()
|
||||
{
|
||||
using namespace QCD;
|
||||
set_full(SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(),
|
||||
GridDefaultSimd(4, vector_type::Nsimd()),
|
||||
GridDefaultMpi()));
|
||||
set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
|
||||
}
|
||||
|
||||
GridFourDimModule(const std::vector<int> tweak_simd)
|
||||
{
|
||||
using namespace QCD;
|
||||
if (tweak_simd.size() != 4)
|
||||
{
|
||||
std::cout << GridLogError
|
||||
<< "Error in GridFourDimModule: SIMD size different from 4"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Checks that the product agrees with the expectation
|
||||
int simd_sum = 1;
|
||||
for (auto &n : tweak_simd)
|
||||
simd_sum *= n;
|
||||
std::cout << GridLogDebug << "TweakSIMD: " << tweak_simd << " Sum: " << simd_sum << std::endl;
|
||||
|
||||
if (simd_sum == vector_type::Nsimd())
|
||||
{
|
||||
set_full(SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(),
|
||||
tweak_simd,
|
||||
GridDefaultMpi()));
|
||||
set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogError
|
||||
<< "Error in GridFourDimModule: SIMD lanes must sum to "
|
||||
<< vector_type::Nsimd()
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
GridFourDimModule(const GridModuleParameters Params)
|
||||
{
|
||||
using namespace QCD;
|
||||
std::vector<int> lattice_v = Params.getLattice();
|
||||
std::vector<int> mpi_v = Params.getMpi();
|
||||
if (lattice_v.size() == 4)
|
||||
{
|
||||
set_full(SpaceTimeGrid::makeFourDimGrid(
|
||||
lattice_v,
|
||||
GridDefaultSimd(4, vector_type::Nsimd()),
|
||||
mpi_v));
|
||||
set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogError
|
||||
<< "Error in GridFourDimModule: lattice dimension different from 4"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
typedef GridFourDimModule<vComplex> GridDefaultFourDimModule;
|
||||
|
||||
|
||||
} // namespace Grid
|
||||
|
||||
#endif // HMC_GRID_MODULES
|
@ -1,107 +0,0 @@
|
||||
Using HMC in Grid version 0.5.1
|
||||
|
||||
These are the instructions to use the Generalised HMC on Grid version 0.5.1.
|
||||
Disclaimer: GRID is still under active development so any information here can be changed in future releases.
|
||||
|
||||
|
||||
Command line options
|
||||
===================
|
||||
(relevant file GenericHMCrunner.h)
|
||||
The initial configuration can be changed at the command line using
|
||||
--StartType <your choice>
|
||||
valid choices, one among these
|
||||
HotStart, ColdStart, TepidStart, CheckpointStart
|
||||
default: HotStart
|
||||
|
||||
example
|
||||
./My_hmc_exec --StartType HotStart
|
||||
|
||||
The CheckpointStart option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by
|
||||
--StartTrajectory <integer>
|
||||
default: 0
|
||||
|
||||
The number of trajectories for a specific run are specified at command line by
|
||||
--Trajectories <integer>
|
||||
default: 1
|
||||
|
||||
The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by
|
||||
--Thermalizations <integer>
|
||||
default: 10
|
||||
|
||||
|
||||
Any other parameter is defined in the source for the executable.
|
||||
|
||||
HMC controls
|
||||
===========
|
||||
|
||||
The lines
|
||||
|
||||
std::vector<int> SerSeed({1, 2, 3, 4, 5});
|
||||
std::vector<int> ParSeed({6, 7, 8, 9, 10});
|
||||
|
||||
define the seeds for the serial and the parallel RNG.
|
||||
|
||||
The line
|
||||
|
||||
TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length
|
||||
|
||||
declares the number of molecular dynamics steps and the total trajectory length.
|
||||
|
||||
|
||||
Actions
|
||||
======
|
||||
|
||||
Action names are defined in the file
|
||||
lib/qcd/Actions.h
|
||||
|
||||
Gauge actions list:
|
||||
|
||||
WilsonGaugeActionR;
|
||||
WilsonGaugeActionF;
|
||||
WilsonGaugeActionD;
|
||||
PlaqPlusRectangleActionR;
|
||||
PlaqPlusRectangleActionF;
|
||||
PlaqPlusRectangleActionD;
|
||||
IwasakiGaugeActionR;
|
||||
IwasakiGaugeActionF;
|
||||
IwasakiGaugeActionD;
|
||||
SymanzikGaugeActionR;
|
||||
SymanzikGaugeActionF;
|
||||
SymanzikGaugeActionD;
|
||||
|
||||
|
||||
ConjugateWilsonGaugeActionR;
|
||||
ConjugateWilsonGaugeActionF;
|
||||
ConjugateWilsonGaugeActionD;
|
||||
ConjugatePlaqPlusRectangleActionR;
|
||||
ConjugatePlaqPlusRectangleActionF;
|
||||
ConjugatePlaqPlusRectangleActionD;
|
||||
ConjugateIwasakiGaugeActionR;
|
||||
ConjugateIwasakiGaugeActionF;
|
||||
ConjugateIwasakiGaugeActionD;
|
||||
ConjugateSymanzikGaugeActionR;
|
||||
ConjugateSymanzikGaugeActionF;
|
||||
ConjugateSymanzikGaugeActionD;
|
||||
|
||||
|
||||
ScalarActionR;
|
||||
ScalarActionF;
|
||||
ScalarActionD;
|
||||
|
||||
|
||||
each of these action accept one single parameter at creation time (beta).
|
||||
Example for creating a Symanzik action with beta=4.0
|
||||
|
||||
SymanzikGaugeActionR(4.0)
|
||||
|
||||
The suffixes R,F,D in the action names refer to the Real
|
||||
(the precision is defined at compile time by the --enable-precision flag in the configure),
|
||||
Float and Double, that force the precision of the action to be 32, 64 bit respectively.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,97 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/BaseCheckpointer.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef BASE_CHECKPOINTER
|
||||
#define BASE_CHECKPOINTER
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
class CheckpointerParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters,
|
||||
std::string, config_prefix,
|
||||
std::string, rng_prefix,
|
||||
int, saveInterval,
|
||||
std::string, format, );
|
||||
|
||||
CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
|
||||
int savemodulo = 1, const std::string &f = "IEEE64BIG")
|
||||
: config_prefix(cf),
|
||||
rng_prefix(rn),
|
||||
saveInterval(savemodulo),
|
||||
format(f){};
|
||||
|
||||
|
||||
template <class ReaderClass >
|
||||
CheckpointerParameters(Reader<ReaderClass> &Reader) {
|
||||
read(Reader, "Checkpointer", *this);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Base class for checkpointers
|
||||
template <class Impl>
|
||||
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
|
||||
public:
|
||||
void build_filenames(int traj, CheckpointerParameters &Params,
|
||||
std::string &conf_file, std::string &rng_file) {
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << Params.rng_prefix << "." << traj;
|
||||
rng_file = os.str();
|
||||
}
|
||||
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << Params.config_prefix << "." << traj;
|
||||
conf_file = os.str();
|
||||
}
|
||||
}
|
||||
|
||||
void check_filename(const std::string &filename){
|
||||
std::ifstream f(filename.c_str());
|
||||
if(!f.good()){
|
||||
std::cout << GridLogError << "Filename " << filename << " not found. Aborting. " << std::endl;
|
||||
abort();
|
||||
};
|
||||
}
|
||||
|
||||
virtual void initialize(const CheckpointerParameters &Params) = 0;
|
||||
|
||||
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
|
||||
GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) = 0;
|
||||
|
||||
}; // class BaseHmcCheckpointer
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
}
|
||||
}
|
||||
#endif
|
@ -1,116 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/BinaryCheckpointer.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef BINARY_CHECKPOINTER
|
||||
#define BINARY_CHECKPOINTER
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// Simple checkpointer, only binary file
|
||||
template <class Impl>
|
||||
class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> {
|
||||
private:
|
||||
CheckpointerParameters Params;
|
||||
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(Impl); // Gets the Field type, a Lattice object
|
||||
|
||||
// Extract types from the Field
|
||||
typedef typename Field::vector_object vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
typedef typename sobj::DoublePrecision sobj_double;
|
||||
|
||||
BinaryHmcCheckpointer(const CheckpointerParameters &Params_) {
|
||||
initialize(Params_);
|
||||
}
|
||||
|
||||
void initialize(const CheckpointerParameters &Params_) { Params = Params_; }
|
||||
|
||||
void truncate(std::string file) {
|
||||
std::ofstream fout(file, std::ios::out);
|
||||
fout.close();
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
|
||||
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
|
||||
uint32_t nersc_csum;
|
||||
uint32_t scidac_csuma;
|
||||
uint32_t scidac_csumb;
|
||||
|
||||
BinarySimpleUnmunger<sobj_double, sobj> munge;
|
||||
truncate(rng);
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
truncate(config);
|
||||
|
||||
BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << GridLogMessage << "Written Binary Configuration " << config
|
||||
<< " checksum " << std::hex
|
||||
<< nersc_csum <<"/"
|
||||
<< scidac_csuma <<"/"
|
||||
<< scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
BinarySimpleMunger<sobj_double, sobj> munge;
|
||||
|
||||
uint32_t nersc_csum;
|
||||
uint32_t scidac_csuma;
|
||||
uint32_t scidac_csumb;
|
||||
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
BinaryIO::readLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << GridLogMessage << "Read Binary Configuration " << config
|
||||
<< " checksums " << std::hex << nersc_csum<<"/"<<scidac_csuma<<"/"<<scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
@ -1,172 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef CP_MODULES_H
|
||||
#define CP_MODULES_H
|
||||
|
||||
|
||||
// FIXME Reorganize QCD namespace
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Checkpoint module, owns the Checkpointer
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class ImplementationPolicy>
|
||||
class CheckPointerModule: public Parametrized<QCD::CheckpointerParameters>, public HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > {
|
||||
public:
|
||||
std::unique_ptr<QCD::BaseHmcCheckpointer<ImplementationPolicy> > CheckPointPtr;
|
||||
typedef QCD::CheckpointerParameters APar;
|
||||
typedef HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > Base;
|
||||
typedef typename Base::Product Product;
|
||||
|
||||
CheckPointerModule(APar Par): Parametrized<APar>(Par) {}
|
||||
template <class ReaderClass>
|
||||
CheckPointerModule(Reader<ReaderClass>& Reader) : Parametrized<APar>(Reader){};
|
||||
|
||||
virtual void print_parameters(){
|
||||
std::cout << this->Par_ << std::endl;
|
||||
}
|
||||
|
||||
Product* getPtr() {
|
||||
if (!CheckPointPtr) initialize();
|
||||
|
||||
return CheckPointPtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <char const *str, class ImplementationPolicy, class ReaderClass >
|
||||
class HMC_CPModuleFactory
|
||||
: public Factory < HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > , Reader<ReaderClass> > {
|
||||
public:
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
HMC_CPModuleFactory(const HMC_CPModuleFactory& e) = delete;
|
||||
void operator=(const HMC_CPModuleFactory& e) = delete;
|
||||
static HMC_CPModuleFactory& getInstance(void) {
|
||||
static HMC_CPModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMC_CPModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Concrete classes
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
namespace QCD{
|
||||
|
||||
template<class ImplementationPolicy>
|
||||
class BinaryCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
typedef CheckPointerModule< ImplementationPolicy> CPBase;
|
||||
using CPBase::CPBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->CheckPointPtr.reset(new BinaryHmcCheckpointer<ImplementationPolicy>(this->Par_));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class ImplementationPolicy>
|
||||
class NerscCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
typedef CheckPointerModule< ImplementationPolicy> CPBase;
|
||||
using CPBase::CPBase; // for constructors inheritance
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->CheckPointPtr.reset(new NerscHmcCheckpointer<ImplementationPolicy>(this->Par_));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
|
||||
template<class ImplementationPolicy>
|
||||
class ILDGCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
typedef CheckPointerModule< ImplementationPolicy> CPBase;
|
||||
using CPBase::CPBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->CheckPointPtr.reset(new ILDGHmcCheckpointer<ImplementationPolicy>(this->Par_));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class ImplementationPolicy, class Metadata>
|
||||
class ScidacCPModule: public CheckPointerModule< ImplementationPolicy> {
|
||||
typedef CheckPointerModule< ImplementationPolicy> CPBase;
|
||||
Metadata M;
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->CheckPointPtr.reset(new ScidacHmcCheckpointer<ImplementationPolicy, Metadata>(this->Par_, M));
|
||||
}
|
||||
public:
|
||||
ScidacCPModule(typename CPBase::APar Par, Metadata M_):M(M_), CPBase(Par) {}
|
||||
template <class ReaderClass>
|
||||
ScidacCPModule(Reader<ReaderClass>& Reader) : Parametrized<typename CPBase::APar>(Reader), M(Reader){};
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
}// QCD temporarily here
|
||||
|
||||
|
||||
extern char cp_string[];
|
||||
|
||||
/*
|
||||
// use macros?
|
||||
static Registrar<QCD::BinaryCPModule<QCD::PeriodicGimplR>, HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPBinarymodXMLInit("Binary");
|
||||
static Registrar<QCD::NerscCPModule<QCD::PeriodicGimplR> , HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPNerscmodXMLInit("Nersc");
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
static Registrar<QCD::ILDGCPModule<QCD::PeriodicGimplR> , HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPILDGmodXMLInit("ILDG");
|
||||
#endif
|
||||
*/
|
||||
|
||||
}// Grid
|
||||
#endif //CP_MODULES_H
|
@ -1,41 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef CHECKPOINTERS_H
|
||||
#define CHECKPOINTERS_H
|
||||
|
||||
#include <Grid/qcd/hmc/checkpointers/BaseCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/NerscCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h>
|
||||
//#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>
|
||||
|
||||
|
||||
#endif // CHECKPOINTERS_H
|
@ -1,124 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/ILDGCheckpointer.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef ILDG_CHECKPOINTER
|
||||
#define ILDG_CHECKPOINTER
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// Only for Gauge fields
|
||||
template <class Implementation>
|
||||
class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
|
||||
private:
|
||||
CheckpointerParameters Params;
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Implementation);
|
||||
|
||||
ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||
|
||||
void initialize(const CheckpointerParameters &Params_) {
|
||||
Params = Params_;
|
||||
|
||||
// check here that the format is valid
|
||||
int ieee32big = (Params.format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (Params.format == std::string("IEEE32"));
|
||||
int ieee64big = (Params.format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (Params.format == std::string("IEEE64"));
|
||||
|
||||
if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
|
||||
std::cout << GridLogError << "Unrecognized file format " << Params.format
|
||||
<< std::endl;
|
||||
std::cout << GridLogError
|
||||
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
|
||||
<< std::endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
GridBase *grid = U._grid;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
IldgWriter _IldgWriter(grid->IsBoss());
|
||||
_IldgWriter.open(config);
|
||||
_IldgWriter.writeConfiguration(U, traj, config, config);
|
||||
_IldgWriter.close();
|
||||
|
||||
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
|
||||
<< " checksum " << std::hex
|
||||
<< nersc_csum<<"/"
|
||||
<< scidac_csuma<<"/"
|
||||
<< scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
FieldMetaData header;
|
||||
IldgReader _IldgReader;
|
||||
_IldgReader.open(config);
|
||||
_IldgReader.readConfiguration(U,header); // format from the header
|
||||
_IldgReader.close();
|
||||
|
||||
std::cout << GridLogMessage << "Read ILDG Configuration from " << config
|
||||
<< " checksum " << std::hex
|
||||
<< nersc_csum<<"/"
|
||||
<< scidac_csuma<<"/"
|
||||
<< scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_LIME
|
||||
#endif // ILDG_CHECKPOINTER
|
@ -1,83 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/NerscCheckpointer.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef NERSC_CHECKPOINTER
|
||||
#define NERSC_CHECKPOINTER
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// Only for Gauge fields
|
||||
template <class Gimpl>
|
||||
class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> {
|
||||
private:
|
||||
CheckpointerParameters Params;
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl); // only for gauge configurations
|
||||
|
||||
NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||
|
||||
void initialize(const CheckpointerParameters &Params_) {
|
||||
Params = Params_;
|
||||
Params.format = "IEEE64BIG"; // fixed, overwrite any other choice
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
|
||||
int precision32 = 1;
|
||||
int tworow = 0;
|
||||
NerscIO::writeRNGState(sRNG, pRNG, rng);
|
||||
NerscIO::writeConfiguration(U, config, tworow, precision32);
|
||||
}
|
||||
};
|
||||
|
||||
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
FieldMetaData header;
|
||||
NerscIO::readRNGState(sRNG, pRNG, header, rng);
|
||||
NerscIO::readConfiguration(U, header, config);
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
#endif
|
@ -1,122 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/ScidacCheckpointer.h
|
||||
|
||||
Copyright (C) 2018
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef SCIDAC_CHECKPOINTER
|
||||
#define SCIDAC_CHECKPOINTER
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// For generic fields
|
||||
template <class Implementation, class Metadata>
|
||||
class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
|
||||
private:
|
||||
CheckpointerParameters Params;
|
||||
Metadata MData;
|
||||
|
||||
typedef typename Implementation::Field Field;
|
||||
|
||||
public:
|
||||
//INHERIT_GIMPL_TYPES(Implementation);
|
||||
|
||||
ScidacHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||
ScidacHmcCheckpointer(const CheckpointerParameters &Params_, const Metadata& M_):MData(M_) { initialize(Params_); }
|
||||
|
||||
void initialize(const CheckpointerParameters &Params_) {
|
||||
Params = Params_;
|
||||
|
||||
// check here that the format is valid
|
||||
int ieee32big = (Params.format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (Params.format == std::string("IEEE32"));
|
||||
int ieee64big = (Params.format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (Params.format == std::string("IEEE64"));
|
||||
|
||||
if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
|
||||
std::cout << GridLogError << "Unrecognized file format " << Params.format
|
||||
<< std::endl;
|
||||
std::cout << GridLogError
|
||||
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
|
||||
<< std::endl;
|
||||
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
if ((traj % Params.saveInterval) == 0) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
GridBase *grid = U._grid;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
ScidacWriter _ScidacWriter(grid->IsBoss());
|
||||
_ScidacWriter.open(config);
|
||||
_ScidacWriter.writeScidacFieldRecord(U, MData);
|
||||
_ScidacWriter.close();
|
||||
|
||||
std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
std::string config, rng;
|
||||
this->build_filenames(traj, Params, config, rng);
|
||||
this->check_filename(rng);
|
||||
this->check_filename(config);
|
||||
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
Metadata md_content;
|
||||
ScidacReader _ScidacReader;
|
||||
_ScidacReader.open(config);
|
||||
_ScidacReader.readScidacFieldRecord(U,md_content); // format from the header
|
||||
_ScidacReader.close();
|
||||
|
||||
std::cout << GridLogMessage << "Read Scidac Configuration from " << config
|
||||
<< " checksum " << std::hex
|
||||
<< nersc_csum<<"/"
|
||||
<< scidac_csuma<<"/"
|
||||
<< scidac_csumb
|
||||
<< std::dec << std::endl;
|
||||
};
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_LIME
|
||||
#endif // ILDG_CHECKPOINTER
|
@ -1,295 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/hmc/integrators/Integrator_algorithm.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
|
||||
/*! @file Integrator_algorithm.h
|
||||
* @brief Declaration of classes for the Molecular Dynamics algorithms
|
||||
*
|
||||
*/
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
#ifndef INTEGRATOR_ALG_INCLUDED
|
||||
#define INTEGRATOR_ALG_INCLUDED
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/* PAB:
|
||||
*
|
||||
* Recursive leapfrog; explanation of nested stepping
|
||||
*
|
||||
* Nested 1:4; units in dt for top level integrator
|
||||
*
|
||||
* CHROMA IroIro
|
||||
* 0 1 0
|
||||
* P 1/2 P 1/2
|
||||
* P 1/16 P1/16
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/8
|
||||
* P 1 P 1
|
||||
* P 1/16 * skipped --- avoids revaluating force
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/8
|
||||
* P 1 P 1
|
||||
* P 1/16 * skipped
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 * skipped
|
||||
* P 1 P 1
|
||||
* P 1/16 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/8 P1/8
|
||||
* U 1/8 U1/8
|
||||
* P 1/16 P1/16
|
||||
* P 1/2 P 1/2
|
||||
*/
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy,
|
||||
class RepresentationPolicy =
|
||||
Representations<FundamentalRepresentation> >
|
||||
class LeapFrog : public Integrator<FieldImplementation, SmearingPolicy,
|
||||
RepresentationPolicy> {
|
||||
public:
|
||||
typedef LeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
||||
Algorithm;
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
std::string integrator_name(){return "LeapFrog";}
|
||||
|
||||
LeapFrog(GridBase* grid, IntegratorParameters Par,
|
||||
ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
|
||||
: Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
|
||||
grid, Par, Aset, Sm){};
|
||||
|
||||
void step(Field& U, int level, int _first, int _last) {
|
||||
int fl = this->as.size() - 1;
|
||||
// level : current level
|
||||
// fl : final level
|
||||
// eps : current step size
|
||||
|
||||
// Get current level step size
|
||||
RealD eps = this->Params.trajL/this->Params.MDsteps;
|
||||
for (int l = 0; l <= level; ++l) eps /= this->as[l].multiplier;
|
||||
|
||||
int multiplier = this->as[level].multiplier;
|
||||
for (int e = 0; e < multiplier; ++e) {
|
||||
int first_step = _first && (e == 0);
|
||||
int last_step = _last && (e == multiplier - 1);
|
||||
|
||||
if (first_step) { // initial half step
|
||||
this->update_P(U, level, eps / 2.0);
|
||||
}
|
||||
|
||||
if (level == fl) { // lowest level
|
||||
this->update_U(U, eps);
|
||||
} else { // recursive function call
|
||||
this->step(U, level + 1, first_step, last_step);
|
||||
}
|
||||
|
||||
int mm = last_step ? 1 : 2;
|
||||
this->update_P(U, level, mm * eps / 2.0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy,
|
||||
class RepresentationPolicy =
|
||||
Representations<FundamentalRepresentation> >
|
||||
class MinimumNorm2 : public Integrator<FieldImplementation, SmearingPolicy,
|
||||
RepresentationPolicy> {
|
||||
private:
|
||||
const RealD lambda = 0.1931833275037836;
|
||||
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
MinimumNorm2(GridBase* grid, IntegratorParameters Par,
|
||||
ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
|
||||
: Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
|
||||
grid, Par, Aset, Sm){};
|
||||
|
||||
std::string integrator_name(){return "MininumNorm2";}
|
||||
|
||||
void step(Field& U, int level, int _first, int _last) {
|
||||
// level : current level
|
||||
// fl : final level
|
||||
// eps : current step size
|
||||
|
||||
int fl = this->as.size() - 1;
|
||||
|
||||
RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0;
|
||||
for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier;
|
||||
|
||||
// Nesting: 2xupdate_U of size eps/2
|
||||
// Next level is eps/2/multiplier
|
||||
|
||||
int multiplier = this->as[level].multiplier;
|
||||
for (int e = 0; e < multiplier; ++e) { // steps per step
|
||||
|
||||
int first_step = _first && (e == 0);
|
||||
int last_step = _last && (e == multiplier - 1);
|
||||
|
||||
if (first_step) { // initial half step
|
||||
this->update_P(U, level, lambda * eps);
|
||||
}
|
||||
|
||||
if (level == fl) { // lowest level
|
||||
this->update_U(U, 0.5 * eps);
|
||||
} else { // recursive function call
|
||||
this->step(U, level + 1, first_step, 0);
|
||||
}
|
||||
|
||||
this->update_P(U, level, (1.0 - 2.0 * lambda) * eps);
|
||||
|
||||
if (level == fl) { // lowest level
|
||||
this->update_U(U, 0.5 * eps);
|
||||
} else { // recursive function call
|
||||
this->step(U, level + 1, 0, last_step);
|
||||
}
|
||||
|
||||
int mm = (last_step) ? 1 : 2;
|
||||
this->update_P(U, level, lambda * eps * mm);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy,
|
||||
class RepresentationPolicy =
|
||||
Representations<FundamentalRepresentation> >
|
||||
class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
|
||||
RepresentationPolicy> {
|
||||
private:
|
||||
const RealD lambda = 1.0 / 6.0;
|
||||
;
|
||||
const RealD chi = 1.0 / 72.0;
|
||||
const RealD xi = 0.0;
|
||||
const RealD theta = 0.0;
|
||||
|
||||
public:
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
||||
ForceGradient(GridBase* grid, IntegratorParameters Par,
|
||||
ActionSet<Field, RepresentationPolicy>& Aset,
|
||||
SmearingPolicy& Sm)
|
||||
: Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
|
||||
grid, Par, Aset, Sm){};
|
||||
|
||||
std::string integrator_name(){return "ForceGradient";}
|
||||
|
||||
void FG_update_P(Field& U, int level, double fg_dt, double ep) {
|
||||
Field Ufg(U._grid);
|
||||
Field Pfg(U._grid);
|
||||
Ufg = U;
|
||||
Pfg = zero;
|
||||
std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
|
||||
<< std::endl;
|
||||
// prepare_fg; no prediction/result cache for now
|
||||
// could relax CG stopping conditions for the
|
||||
// derivatives in the small step since the force gets multiplied by
|
||||
// a tiny dt^2 term relative to main force.
|
||||
//
|
||||
// Presently 4 force evals, and should have 3, so 1.33x too expensive.
|
||||
// could reduce this with sloppy CG to perhaps 1.15x too expensive
|
||||
// even without prediction.
|
||||
this->update_P(Pfg, Ufg, level, 1.0);
|
||||
this->update_U(Pfg, Ufg, fg_dt);
|
||||
this->update_P(Ufg, level, ep);
|
||||
}
|
||||
|
||||
void step(Field& U, int level, int _first, int _last) {
|
||||
RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0;
|
||||
for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier;
|
||||
|
||||
RealD Chi = chi * eps * eps * eps;
|
||||
|
||||
int fl = this->as.size() - 1;
|
||||
|
||||
int multiplier = this->as[level].multiplier;
|
||||
|
||||
for (int e = 0; e < multiplier; ++e) { // steps per step
|
||||
|
||||
int first_step = _first && (e == 0);
|
||||
int last_step = _last && (e == multiplier - 1);
|
||||
|
||||
if (first_step) { // initial half step
|
||||
this->update_P(U, level, lambda * eps);
|
||||
}
|
||||
|
||||
if (level == fl) { // lowest level
|
||||
this->update_U(U, 0.5 * eps);
|
||||
} else { // recursive function call
|
||||
this->step(U, level + 1, first_step, 0);
|
||||
}
|
||||
|
||||
this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
|
||||
(1.0 - 2.0 * lambda) * eps);
|
||||
|
||||
if (level == fl) { // lowest level
|
||||
this->update_U(U, 0.5 * eps);
|
||||
} else { // recursive function call
|
||||
this->step(U, level + 1, 0, last_step);
|
||||
}
|
||||
|
||||
int mm = (last_step) ? 1 : 2;
|
||||
this->update_P(U, level, lambda * eps * mm);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // INTEGRATOR_INCLUDED
|
@ -1,517 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/ActionModules.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef ACTION_MODULES_H
|
||||
#define ACTION_MODULES_H
|
||||
|
||||
/*
|
||||
Define loadable, serializable modules
|
||||
for the HMC execution
|
||||
*/
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Actions
|
||||
//////////////////////////////////////////////
|
||||
|
||||
template <class Product, class R>
|
||||
class ActionModuleBase: public HMCModuleBase<Product>{
|
||||
public:
|
||||
typedef R Resource;
|
||||
virtual void acquireResource(R& ){};
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class ActionType, class APar>
|
||||
class ActionModule
|
||||
: public Parametrized<APar>,
|
||||
public ActionModuleBase< QCD::Action<typename ActionType::GaugeField> , QCD::GridModule > {
|
||||
public:
|
||||
typedef ActionModuleBase< QCD::Action<typename ActionType::GaugeField>, QCD::GridModule > Base;
|
||||
typedef typename Base::Product Product;
|
||||
typedef APar Parameters;
|
||||
|
||||
std::unique_ptr<ActionType> ActionPtr;
|
||||
|
||||
ActionModule(APar Par) : Parametrized<APar>(Par) {}
|
||||
|
||||
template <class ReaderClass>
|
||||
ActionModule(Reader<ReaderClass>& Reader) : Parametrized<APar>(Reader){};
|
||||
|
||||
|
||||
virtual void print_parameters(){
|
||||
Parametrized<APar>::print_parameters();
|
||||
}
|
||||
|
||||
Product* getPtr() {
|
||||
if (!ActionPtr) initialize();
|
||||
|
||||
return ActionPtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////
|
||||
// Modules
|
||||
//////////////////////////
|
||||
|
||||
namespace QCD{
|
||||
|
||||
class PlaqPlusRectangleGaugeActionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(PlaqPlusRectangleGaugeActionParameters,
|
||||
RealD, c_plaq,
|
||||
RealD, c_rect);
|
||||
|
||||
};
|
||||
|
||||
class RBCGaugeActionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(RBCGaugeActionParameters,
|
||||
RealD, beta,
|
||||
RealD, c1);
|
||||
|
||||
};
|
||||
|
||||
class BetaGaugeActionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(BetaGaugeActionParameters,
|
||||
RealD, beta);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class WilsonGModule: public ActionModule<WilsonGaugeAction<Impl>, BetaGaugeActionParameters> {
|
||||
typedef ActionModule<WilsonGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new WilsonGaugeAction<Impl>(this->Par_.beta));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Impl >
|
||||
class PlaqPlusRectangleGModule: public ActionModule<PlaqPlusRectangleAction<Impl>, PlaqPlusRectangleGaugeActionParameters> {
|
||||
typedef ActionModule<PlaqPlusRectangleAction<Impl>, PlaqPlusRectangleGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new PlaqPlusRectangleAction<Impl>(this->Par_.c_plaq, this->Par_.c_rect));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Impl >
|
||||
class RBCGModule: public ActionModule<RBCGaugeAction<Impl>, RBCGaugeActionParameters> {
|
||||
typedef ActionModule<RBCGaugeAction<Impl>, RBCGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new RBCGaugeAction<Impl>(this->Par_.beta, this->Par_.c1));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class SymanzikGModule: public ActionModule<SymanzikGaugeAction<Impl>, BetaGaugeActionParameters> {
|
||||
typedef ActionModule<SymanzikGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new SymanzikGaugeAction<Impl>(this->Par_.beta));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Impl >
|
||||
class IwasakiGModule: public ActionModule<IwasakiGaugeAction<Impl>, BetaGaugeActionParameters> {
|
||||
typedef ActionModule<IwasakiGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new IwasakiGaugeAction<Impl>(this->Par_.beta));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class DBW2GModule: public ActionModule<DBW2GaugeAction<Impl>, BetaGaugeActionParameters> {
|
||||
typedef ActionModule<DBW2GaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ActionPtr.reset(new DBW2GaugeAction<Impl>(this->Par_.beta));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Fermion Actions
|
||||
/////////////////////////////////////////
|
||||
|
||||
|
||||
template <class Impl, template <typename> class FermionA, class Params = NoParameters >
|
||||
class PseudoFermionModuleBase: public ActionModule<FermionA<Impl>, Params> {
|
||||
protected:
|
||||
typedef ActionModule<FermionA<Impl>, Params> ActionBase;
|
||||
using ActionBase::ActionBase; // for constructors
|
||||
|
||||
typedef std::unique_ptr<FermionOperatorModuleBase<FermionOperator<Impl>> > operator_type;
|
||||
typedef std::unique_ptr<HMCModuleBase<OperatorFunction<typename Impl::FermionField> > > solver_type;
|
||||
|
||||
template <class ReaderClass>
|
||||
void getFermionOperator(Reader<ReaderClass>& Reader, operator_type &fo, std::string section_name){
|
||||
auto &FOFactory = HMC_FermionOperatorModuleFactory<fermionop_string, Impl, ReaderClass>::getInstance();
|
||||
Reader.push(section_name);
|
||||
std::string op_name;
|
||||
read(Reader,"name", op_name);
|
||||
fo = FOFactory.create(op_name, Reader);
|
||||
Reader.pop();
|
||||
}
|
||||
|
||||
template <class ReaderClass>
|
||||
void getSolverOperator(Reader<ReaderClass>& Reader, solver_type &so, std::string section_name){
|
||||
auto& SolverFactory = HMC_SolverModuleFactory<solver_string, typename Impl::FermionField, ReaderClass>::getInstance();
|
||||
Reader.push(section_name);
|
||||
std::string solv_name;
|
||||
read(Reader,"name", solv_name);
|
||||
so = SolverFactory.create(solv_name, Reader);
|
||||
Reader.pop();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class TwoFlavourFModule: public PseudoFermionModuleBase<Impl, TwoFlavourPseudoFermionAction>{
|
||||
typedef PseudoFermionModuleBase<Impl, TwoFlavourPseudoFermionAction> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_mod;
|
||||
typename Base::solver_type solver_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
TwoFlavourFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getSolverOperator(R, solver_mod, "Solver");
|
||||
this->getFermionOperator(R, fop_mod, "Operator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
// here temporarily assuming that the force and action solver are the same
|
||||
this->ActionPtr.reset(new TwoFlavourPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// very similar, I could have templated this but it is overkilling
|
||||
template <class Impl >
|
||||
class TwoFlavourEOFModule: public PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction>{
|
||||
typedef PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_mod;
|
||||
typename Base::solver_type solver_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
TwoFlavourEOFModule(Reader<ReaderClass>& R): PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction>(R) {
|
||||
this->getSolverOperator(R, solver_mod, "Solver");
|
||||
this->getFermionOperator(R, fop_mod, "Operator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
// here temporarily assuming that the force and action solver are the same
|
||||
this->ActionPtr.reset(new TwoFlavourEvenOddPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class TwoFlavourRatioFModule: public PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction>{
|
||||
typedef PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_numerator_mod;
|
||||
typename Base::operator_type fop_denominator_mod;
|
||||
typename Base::solver_type solver_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_numerator_mod->AddGridPair(GridMod);
|
||||
fop_denominator_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
TwoFlavourRatioFModule(Reader<ReaderClass>& R): PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction>(R) {
|
||||
this->getSolverOperator(R, solver_mod, "Solver");
|
||||
this->getFermionOperator(R, fop_numerator_mod, "Numerator");
|
||||
this->getFermionOperator(R, fop_denominator_mod, "Denominator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
// here temporarily assuming that the force and action solver are the same
|
||||
this->ActionPtr.reset(new TwoFlavourRatioPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()),
|
||||
*(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Impl >
|
||||
class TwoFlavourRatioEOFModule: public PseudoFermionModuleBase<Impl, TwoFlavourEvenOddRatioPseudoFermionAction>{
|
||||
typedef PseudoFermionModuleBase<Impl, TwoFlavourEvenOddRatioPseudoFermionAction> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_numerator_mod;
|
||||
typename Base::operator_type fop_denominator_mod;
|
||||
typename Base::solver_type solver_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_numerator_mod->AddGridPair(GridMod);
|
||||
fop_denominator_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
TwoFlavourRatioEOFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getSolverOperator(R, solver_mod, "Solver");
|
||||
this->getFermionOperator(R, fop_numerator_mod, "Numerator");
|
||||
this->getFermionOperator(R, fop_denominator_mod, "Denominator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
// here temporarily assuming that the force and action solver are the same
|
||||
this->ActionPtr.reset(new TwoFlavourEvenOddRatioPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()),
|
||||
*(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class OneFlavourFModule: public PseudoFermionModuleBase<Impl, OneFlavourRationalPseudoFermionAction, OneFlavourRationalParams>{
|
||||
typedef PseudoFermionModuleBase<Impl, OneFlavourRationalPseudoFermionAction, OneFlavourRationalParams> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
OneFlavourFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getFermionOperator(R, fop_mod, "Operator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
this->ActionPtr.reset(new OneFlavourRationalPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), this->Par_ ));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class Impl >
|
||||
class OneFlavourEOFModule:
|
||||
public PseudoFermionModuleBase<Impl, OneFlavourEvenOddRationalPseudoFermionAction, OneFlavourRationalParams>
|
||||
{
|
||||
typedef PseudoFermionModuleBase<Impl, OneFlavourEvenOddRationalPseudoFermionAction, OneFlavourRationalParams> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
OneFlavourEOFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getFermionOperator(R, fop_mod, "Operator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
this->ActionPtr.reset(new OneFlavourEvenOddRationalPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), this->Par_ ));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class OneFlavourRatioFModule:
|
||||
public PseudoFermionModuleBase<Impl, OneFlavourRatioRationalPseudoFermionAction, OneFlavourRationalParams>
|
||||
{
|
||||
|
||||
typedef PseudoFermionModuleBase<Impl, OneFlavourRatioRationalPseudoFermionAction, OneFlavourRationalParams> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_numerator_mod;
|
||||
typename Base::operator_type fop_denominator_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_numerator_mod->AddGridPair(GridMod);
|
||||
fop_denominator_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
OneFlavourRatioFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getFermionOperator(R, fop_numerator_mod, "Numerator");
|
||||
this->getFermionOperator(R, fop_denominator_mod, "Denominator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
this->ActionPtr.reset(new OneFlavourRatioRationalPseudoFermionAction<Impl>( *(this->fop_numerator_mod->getPtr()),
|
||||
*(this->fop_denominator_mod->getPtr()),
|
||||
this->Par_ ));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <class Impl >
|
||||
class OneFlavourRatioEOFModule:
|
||||
public PseudoFermionModuleBase<Impl, OneFlavourEvenOddRatioRationalPseudoFermionAction, OneFlavourRationalParams>
|
||||
{
|
||||
|
||||
typedef PseudoFermionModuleBase<Impl, OneFlavourEvenOddRatioRationalPseudoFermionAction, OneFlavourRationalParams> Base;
|
||||
using Base::Base;
|
||||
|
||||
typename Base::operator_type fop_numerator_mod;
|
||||
typename Base::operator_type fop_denominator_mod;
|
||||
|
||||
public:
|
||||
virtual void acquireResource(typename Base::Resource& GridMod){
|
||||
fop_numerator_mod->AddGridPair(GridMod);
|
||||
fop_denominator_mod->AddGridPair(GridMod);
|
||||
}
|
||||
|
||||
// constructor
|
||||
template <class ReaderClass>
|
||||
OneFlavourRatioEOFModule(Reader<ReaderClass>& R): Base(R) {
|
||||
this->getFermionOperator(R, fop_numerator_mod, "Numerator");
|
||||
this->getFermionOperator(R, fop_denominator_mod, "Denominator");
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize() {
|
||||
this->ActionPtr.reset(new OneFlavourEvenOddRatioRationalPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()),
|
||||
*(this->fop_denominator_mod->getPtr()),
|
||||
this->Par_ ));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}// QCD temporarily here
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////
|
||||
// Factories specialisations
|
||||
////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// use the same classed defined by Antonin, does not make sense to rewrite
|
||||
// Factory is perfectly fine
|
||||
// Registar must be changed because I do not want to use the ModuleFactory
|
||||
|
||||
// explicit ref to LatticeGaugeField must be changed or put in the factory
|
||||
//typedef ActionModuleBase< QCD::Action< QCD::LatticeGaugeField >, QCD::GridModule > HMC_LGTActionModBase;
|
||||
//typedef ActionModuleBase< QCD::Action< QCD::LatticeReal >, QCD::GridModule > HMC_ScalarActionModBase;
|
||||
|
||||
template <char const *str, class Field, class ReaderClass >
|
||||
class HMC_ActionModuleFactory
|
||||
: public Factory < ActionModuleBase< QCD::Action< Field >, QCD::GridModule > , Reader<ReaderClass> > {
|
||||
public:
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
HMC_ActionModuleFactory(const HMC_ActionModuleFactory& e) = delete;
|
||||
void operator=(const HMC_ActionModuleFactory& e) = delete;
|
||||
static HMC_ActionModuleFactory& getInstance(void) {
|
||||
static HMC_ActionModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMC_ActionModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
extern char gauge_string[];
|
||||
} // Grid
|
||||
|
||||
|
||||
#endif //HMC_MODULES_H
|
@ -1,109 +0,0 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: Factory.h
|
||||
|
||||
Copyright (C) 2015
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef Factory_hpp_
|
||||
#define Factory_hpp_
|
||||
|
||||
|
||||
namespace Grid{
|
||||
|
||||
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
* abstract factory class *
|
||||
******************************************************************************/
|
||||
template <typename T, typename CreatorInput>
|
||||
class Factory
|
||||
{
|
||||
public:
|
||||
typedef std::function< std::unique_ptr<T> (const CreatorInput&) > Func;
|
||||
|
||||
// constructor
|
||||
Factory(void) = default;
|
||||
// destructor
|
||||
virtual ~Factory(void) = default;
|
||||
// registration
|
||||
void registerBuilder(const std::string type, const Func &f);
|
||||
// get builder list
|
||||
std::vector<std::string> getBuilderList(void) const;
|
||||
// factory
|
||||
std::unique_ptr<T> create(const std::string type,
|
||||
const CreatorInput& input) const;
|
||||
private:
|
||||
std::map<std::string, Func> builder_;
|
||||
virtual std::string obj_type() const = 0;
|
||||
};
|
||||
|
||||
/******************************************************************************
|
||||
* template implementation *
|
||||
******************************************************************************/
|
||||
// registration ////////////////////////////////////////////////////////////////
|
||||
template <typename T, typename CreatorInput>
|
||||
void Factory<T, CreatorInput>::registerBuilder(const std::string type, const Func &f)
|
||||
{
|
||||
builder_[type] = f;
|
||||
}
|
||||
|
||||
// get module list /////////////////////////////////////////////////////////////
|
||||
template <typename T, typename CreatorInput>
|
||||
std::vector<std::string> Factory<T, CreatorInput>::getBuilderList(void) const
|
||||
{
|
||||
std::vector<std::string> list;
|
||||
|
||||
for (auto &b: builder_)
|
||||
{
|
||||
list.push_back(b.first);
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
// factory /////////////////////////////////////////////////////////////////////
|
||||
template <typename T, typename CreatorInput>
|
||||
std::unique_ptr<T> Factory<T, CreatorInput>::create(const std::string type,
|
||||
const CreatorInput& input) const
|
||||
{
|
||||
Func func;
|
||||
|
||||
std::cout << GridLogDebug << "Creating object of type "<< type << std::endl;
|
||||
try
|
||||
{
|
||||
func = builder_.at(type);
|
||||
}
|
||||
catch (std::out_of_range &)
|
||||
{
|
||||
//HADRONS_ERROR("object of type '" + type + "' unknown");
|
||||
std::cout << GridLogError << "Error" << std::endl;
|
||||
std::cout << GridLogError << obj_type() << " object of name [" << type << "] unknown" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return func(input);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // Factory_hpp_
|
@ -1,243 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/FermionOperatorModules.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef FERMIONOPERATOR_MODULES_H
|
||||
#define FERMIONOPERATOR_MODULES_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////
|
||||
// Fermion operators
|
||||
/////////////////////////////////////
|
||||
template < class Product>
|
||||
class FermionOperatorModuleBase : public HMCModuleBase<Product>{
|
||||
public:
|
||||
virtual void AddGridPair(QCD::GridModule&) = 0;
|
||||
};
|
||||
|
||||
template <template <typename> class FOType, class FermionImpl, class FOPar>
|
||||
class FermionOperatorModule
|
||||
: public Parametrized<FOPar>,
|
||||
public FermionOperatorModuleBase<QCD::FermionOperator<FermionImpl> > {
|
||||
|
||||
protected:
|
||||
std::unique_ptr< FOType<FermionImpl> > FOPtr;
|
||||
std::vector< QCD::GridModule* > GridRefs;
|
||||
public:
|
||||
typedef HMCModuleBase< QCD::FermionOperator<FermionImpl> > Base;
|
||||
typedef typename Base::Product Product;
|
||||
|
||||
FermionOperatorModule(FOPar Par) : Parametrized<FOPar>(Par) {}
|
||||
|
||||
template <class ReaderClass>
|
||||
FermionOperatorModule(Reader<ReaderClass>& Reader) : Parametrized<FOPar>(Reader){};
|
||||
|
||||
void AddGridPair(QCD::GridModule &Mod){
|
||||
if (GridRefs.size()>1){
|
||||
std::cout << GridLogError << "Adding too many Grids to the FermionOperatorModule" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
GridRefs.push_back(&Mod);
|
||||
|
||||
if (Ls()){
|
||||
GridRefs.push_back(new QCD::GridModule());
|
||||
GridRefs[1]->set_full(QCD::SpaceTimeGrid::makeFiveDimGrid(Ls(),GridRefs[0]->get_full()));
|
||||
GridRefs[1]->set_rb(QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls(),GridRefs[0]->get_full()));
|
||||
}
|
||||
}
|
||||
|
||||
virtual unsigned int Ls(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual void print_parameters(){
|
||||
std::cout << this->Par_ << std::endl;
|
||||
}
|
||||
|
||||
Product* getPtr() {
|
||||
if (!FOPtr) initialize();
|
||||
|
||||
return FOPtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Factory
|
||||
template <char const *str, class FermionImpl, class ReaderClass >
|
||||
class HMC_FermionOperatorModuleFactory
|
||||
: public Factory < FermionOperatorModuleBase<QCD::FermionOperator<FermionImpl> > , Reader<ReaderClass> > {
|
||||
public:
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
|
||||
HMC_FermionOperatorModuleFactory(const HMC_FermionOperatorModuleFactory& e) = delete;
|
||||
void operator=(const HMC_FermionOperatorModuleFactory& e) = delete;
|
||||
static HMC_FermionOperatorModuleFactory& getInstance(void) {
|
||||
static HMC_FermionOperatorModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMC_FermionOperatorModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
extern char fermionop_string[];
|
||||
namespace QCD{
|
||||
|
||||
// Modules
|
||||
class WilsonFermionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonFermionParameters,
|
||||
RealD, mass);
|
||||
};
|
||||
|
||||
|
||||
template <class FermionImpl >
|
||||
class WilsonFermionModule: public FermionOperatorModule<WilsonFermion, FermionImpl, WilsonFermionParameters> {
|
||||
typedef FermionOperatorModule<WilsonFermion, FermionImpl, WilsonFermionParameters> FermBase;
|
||||
using FermBase::FermBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
auto GridMod = this->GridRefs[0];
|
||||
typename FermionImpl::GaugeField U(GridMod->get_full());
|
||||
this->FOPtr.reset(new WilsonFermion<FermionImpl>(U, *(GridMod->get_full()), *(GridMod->get_rb()), this->Par_.mass));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
class MobiusFermionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MobiusFermionParameters,
|
||||
RealD, mass,
|
||||
RealD, M5,
|
||||
RealD, b,
|
||||
RealD, c,
|
||||
unsigned int, Ls);
|
||||
};
|
||||
|
||||
template <class FermionImpl >
|
||||
class MobiusFermionModule: public FermionOperatorModule<MobiusFermion, FermionImpl, MobiusFermionParameters> {
|
||||
typedef FermionOperatorModule<MobiusFermion, FermionImpl, MobiusFermionParameters> FermBase;
|
||||
using FermBase::FermBase; // for constructors
|
||||
|
||||
virtual unsigned int Ls(){
|
||||
return this->Par_.Ls;
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
auto GridMod = this->GridRefs[0];
|
||||
auto GridMod5d = this->GridRefs[1];
|
||||
typename FermionImpl::GaugeField U(GridMod->get_full());
|
||||
this->FOPtr.reset(new MobiusFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
|
||||
*(GridMod5d->get_full()), *(GridMod5d->get_rb()),
|
||||
this->Par_.mass, this->Par_.M5, this->Par_.b, this->Par_.c));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class DomainWallFermionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallFermionParameters,
|
||||
RealD, mass,
|
||||
RealD, M5,
|
||||
unsigned int, Ls);
|
||||
};
|
||||
|
||||
template <class FermionImpl >
|
||||
class DomainWallFermionModule: public FermionOperatorModule<DomainWallFermion, FermionImpl, DomainWallFermionParameters> {
|
||||
typedef FermionOperatorModule<DomainWallFermion, FermionImpl, DomainWallFermionParameters> FermBase;
|
||||
using FermBase::FermBase; // for constructors
|
||||
|
||||
virtual unsigned int Ls(){
|
||||
return this->Par_.Ls;
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
auto GridMod = this->GridRefs[0];
|
||||
auto GridMod5d = this->GridRefs[1];
|
||||
typename FermionImpl::GaugeField U(GridMod->get_full());
|
||||
this->FOPtr.reset(new DomainWallFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
|
||||
*(GridMod5d->get_full()), *(GridMod5d->get_rb()),
|
||||
this->Par_.mass, this->Par_.M5));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class DomainWallEOFAFermionParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallEOFAFermionParameters,
|
||||
RealD, mq1,
|
||||
RealD, mq2,
|
||||
RealD, mq3,
|
||||
RealD, shift,
|
||||
int, pm,
|
||||
RealD, M5,
|
||||
unsigned int, Ls);
|
||||
};
|
||||
|
||||
template <class FermionImpl >
|
||||
class DomainWallEOFAFermionModule: public FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> {
|
||||
typedef FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> FermBase;
|
||||
using FermBase::FermBase; // for constructors
|
||||
|
||||
virtual unsigned int Ls(){
|
||||
return this->Par_.Ls;
|
||||
}
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
auto GridMod = this->GridRefs[0];
|
||||
auto GridMod5d = this->GridRefs[1];
|
||||
typename FermionImpl::GaugeField U(GridMod->get_full());
|
||||
this->FOPtr.reset(new DomainWallEOFAFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
|
||||
*(GridMod5d->get_full()), *(GridMod5d->get_rb()),
|
||||
this->Par_.mq1, this->Par_.mq2, this->Par_.mq3,
|
||||
this->Par_.shift, this->Par_.pm, this->Par_.M5));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // QCD
|
||||
} // Grid
|
||||
|
||||
|
||||
#endif //FERMIONOPERATOR_MODULES_H
|
@ -1,39 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/Modules.cc
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
namespace Grid{
|
||||
|
||||
char gauge_string[] = "gauge";
|
||||
char cp_string[] = "CheckPointer";
|
||||
char hmc_string[] = "HMC";
|
||||
char observable_string[] = "Observable";
|
||||
char solver_string[] = "Solver";
|
||||
char fermionop_string[] = "FermionOperator";
|
||||
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef HMC_MODULES_H
|
||||
#define HMC_MODULES_H
|
||||
|
||||
/*
|
||||
Define loadable, serializable modules
|
||||
for the HMC execution
|
||||
*/
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Empty class for no parameters
|
||||
class NoParameters{};
|
||||
|
||||
|
||||
/*
|
||||
Base class for modules with parameters
|
||||
*/
|
||||
template < class P >
|
||||
class Parametrized{
|
||||
public:
|
||||
typedef P Parameters;
|
||||
|
||||
Parametrized(Parameters Par):Par_(Par){};
|
||||
|
||||
template <class ReaderClass>
|
||||
Parametrized(Reader<ReaderClass> & R, std::string section_name = "parameters"){
|
||||
read(R, section_name, Par_);
|
||||
}
|
||||
|
||||
void set_parameters(Parameters Par){
|
||||
Par_ = Par;
|
||||
}
|
||||
|
||||
void print_parameters(){
|
||||
std::cout << Par_ << std::endl;
|
||||
}
|
||||
|
||||
protected:
|
||||
Parameters Par_;
|
||||
private:
|
||||
std::string section_name;
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
class Parametrized<NoParameters>{
|
||||
public:
|
||||
typedef NoParameters Parameters;
|
||||
|
||||
Parametrized(Parameters Par){};
|
||||
|
||||
template <class ReaderClass>
|
||||
Parametrized(Reader<ReaderClass> & Reader){};
|
||||
|
||||
void set_parameters(Parameters Par){}
|
||||
|
||||
void print_parameters(){}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////
|
||||
// Lowest level abstract module class
|
||||
////////////////////////////////////////
|
||||
template <class Prod>
|
||||
class HMCModuleBase {
|
||||
public:
|
||||
typedef Prod Product;
|
||||
|
||||
virtual Prod* getPtr() = 0;
|
||||
|
||||
// add a getReference?
|
||||
|
||||
virtual void print_parameters(){}; // default to nothing
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Registration class
|
||||
/////////////////////////////////////////////
|
||||
|
||||
template <class T, class TheFactory>
|
||||
class Registrar {
|
||||
public:
|
||||
Registrar(std::string className) {
|
||||
// register the class factory function
|
||||
TheFactory::getInstance().registerBuilder(className,
|
||||
[&](typename TheFactory::TheReader Reader)
|
||||
{
|
||||
return std::unique_ptr<T>(new T(Reader));
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif //HMC_MODULES_H
|
@ -1,158 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/ObservableModules.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef HMC_OBSERVABLE_MODULES_H
|
||||
#define HMC_OBSERVABLE_MODULES_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////
|
||||
// Observables
|
||||
/////////////////////////////
|
||||
template <class ObservableType, class OPar>
|
||||
class ObservableModule
|
||||
: public Parametrized<OPar>,
|
||||
public HMCModuleBase< QCD::HmcObservable<typename ObservableType::Field> > {
|
||||
public:
|
||||
typedef HMCModuleBase< QCD::HmcObservable< typename ObservableType::Field> > Base;
|
||||
typedef typename Base::Product Product;
|
||||
typedef OPar Parameters;
|
||||
|
||||
std::unique_ptr<ObservableType> ObservablePtr;
|
||||
|
||||
ObservableModule(OPar Par) : Parametrized<OPar>(Par) {}
|
||||
|
||||
virtual void print_parameters(){
|
||||
Parametrized<OPar>::print_parameters();
|
||||
}
|
||||
|
||||
template <class ReaderClass>
|
||||
ObservableModule(Reader<ReaderClass>& Reader) : Parametrized<OPar>(Reader){};
|
||||
|
||||
Product* getPtr() {
|
||||
if (!ObservablePtr) initialize();
|
||||
|
||||
return ObservablePtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////
|
||||
// Modules
|
||||
////////////////
|
||||
|
||||
namespace QCD{
|
||||
|
||||
//// Observables module
|
||||
class PlaquetteObsParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(PlaquetteObsParameters,
|
||||
std::string, output_prefix);
|
||||
};
|
||||
|
||||
template < class Impl >
|
||||
class PlaquetteMod: public ObservableModule<PlaquetteLogger<Impl>, NoParameters>{
|
||||
typedef ObservableModule<PlaquetteLogger<Impl>, NoParameters> ObsBase;
|
||||
using ObsBase::ObsBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ObservablePtr.reset(new PlaquetteLogger<Impl>());
|
||||
}
|
||||
public:
|
||||
PlaquetteMod(): ObsBase(NoParameters()){}
|
||||
};
|
||||
|
||||
template < class Impl >
|
||||
class PolyakovMod: public ObservableModule<PolyakovLogger<Impl>, NoParameters>{
|
||||
typedef ObservableModule<PolyakovLogger<Impl>, NoParameters> ObsBase;
|
||||
using ObsBase::ObsBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ObservablePtr.reset(new PolyakovLogger<Impl>());
|
||||
}
|
||||
public:
|
||||
PolyakovMod(): ObsBase(NoParameters()){}
|
||||
};
|
||||
|
||||
|
||||
template < class Impl >
|
||||
class TopologicalChargeMod: public ObservableModule<TopologicalCharge<Impl>, TopologyObsParameters>{
|
||||
typedef ObservableModule<TopologicalCharge<Impl>, TopologyObsParameters> ObsBase;
|
||||
using ObsBase::ObsBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->ObservablePtr.reset(new TopologicalCharge<Impl>(this->Par_));
|
||||
}
|
||||
public:
|
||||
TopologicalChargeMod(TopologyObsParameters Par): ObsBase(Par){}
|
||||
TopologicalChargeMod(): ObsBase(){}
|
||||
};
|
||||
|
||||
|
||||
}// QCD temporarily here
|
||||
|
||||
|
||||
////////////////////////////////////////
|
||||
// Factories specialisations
|
||||
////////////////////////////////////////
|
||||
// explicit ref to LatticeGaugeField must be changed or put in the factory
|
||||
//typedef HMCModuleBase< QCD::HmcObservable<QCD::LatticeGaugeField> > HMC_ObsModBase;
|
||||
|
||||
template <char const *str, class Field, class ReaderClass >
|
||||
class HMC_ObservablesModuleFactory
|
||||
: public Factory < HMCModuleBase< QCD::HmcObservable<Field> >, Reader<ReaderClass> > {
|
||||
public:
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
HMC_ObservablesModuleFactory(const HMC_ObservablesModuleFactory& e) = delete;
|
||||
void operator=(const HMC_ObservablesModuleFactory& e) = delete;
|
||||
static HMC_ObservablesModuleFactory& getInstance(void) {
|
||||
static HMC_ObservablesModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMC_ObservablesModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
extern char observable_string[];
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif //HMC_OBSERVABLE_MODULES_H
|
@ -1,129 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/Registration.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef MODULES_REGISTRATION_H
|
||||
#define MODULES_REGISTRATION_H
|
||||
|
||||
// simplify with macros
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Actions
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
typedef QCD::WilsonGModule<ImplementationPolicy> WilsonGMod;
|
||||
typedef QCD::SymanzikGModule<ImplementationPolicy> SymanzikGMod;
|
||||
typedef QCD::IwasakiGModule<ImplementationPolicy> IwasakiGMod;
|
||||
typedef QCD::DBW2GModule<ImplementationPolicy> DBW2GMod;
|
||||
typedef QCD::RBCGModule<ImplementationPolicy> RBCGMod;
|
||||
typedef QCD::PlaqPlusRectangleGModule<ImplementationPolicy> PlaqPlusRectangleGMod;
|
||||
|
||||
static Registrar<QCD::WilsonGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __WGmodXMLInit("Wilson");
|
||||
static Registrar<QCD::SymanzikGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __SymGmodXMLInit("Symanzik");
|
||||
static Registrar<QCD::IwasakiGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __IwGmodXMLInit("Iwasaki");
|
||||
static Registrar<QCD::DBW2GMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __DBW2GmodXMLInit("DBW2");
|
||||
static Registrar<QCD::RBCGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __RBCGmodXMLInit("RBC");
|
||||
static Registrar<QCD::PlaqPlusRectangleGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __PPRectGmodXMLInit("PlaqPlusRect");
|
||||
|
||||
|
||||
// FIXME more general implementation
|
||||
static Registrar<QCD::TwoFlavourFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourFmodXMLInit("TwoFlavours");
|
||||
static Registrar<QCD::TwoFlavourRatioFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourRatioFmodXMLInit("TwoFlavoursRatio");
|
||||
static Registrar<QCD::TwoFlavourEOFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourEOFmodXMLInit("TwoFlavoursEvenOdd");
|
||||
static Registrar<QCD::TwoFlavourRatioEOFModule<FermionImplementationPolicy>,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourRatioEOFmodXMLInit("TwoFlavoursEvenOddRatio");
|
||||
static Registrar<QCD::OneFlavourFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourFmodXMLInit("OneFlavour");
|
||||
static Registrar<QCD::OneFlavourEOFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourEOFmodXMLInit("OneFlavourEvenOdd");
|
||||
static Registrar<QCD::OneFlavourRatioFModule<FermionImplementationPolicy> ,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourRatioFmodXMLInit("OneFlavourRatio");
|
||||
static Registrar<QCD::OneFlavourRatioEOFModule<FermionImplementationPolicy>,
|
||||
HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourRatioEOFmodXMLInit("OneFlavourEvenOddRatio");
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Solvers
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Now a specific registration with a fermion field
|
||||
// here must instantiate CG and CR for every new fermion field type (macro!!)
|
||||
|
||||
static Registrar< ConjugateGradientModule<QCD::WilsonFermionR::FermionField>,
|
||||
HMC_SolverModuleFactory<solver_string, QCD::WilsonFermionR::FermionField, Serialiser> > __CGWFmodXMLInit("ConjugateGradient");
|
||||
static Registrar< ConjugateResidualModule<QCD::WilsonFermionR::FermionField>,
|
||||
HMC_SolverModuleFactory<solver_string, QCD::WilsonFermionR::FermionField, Serialiser> > __CRWFmodXMLInit("ConjugateResidual");
|
||||
|
||||
// add the staggered, scalar versions here
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Fermion operators
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static Registrar< QCD::WilsonFermionModule<FermionImplementationPolicy>,
|
||||
HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __WilsonFOPmodXMLInit("Wilson");
|
||||
static Registrar< QCD::MobiusFermionModule<FermionImplementationPolicy>,
|
||||
HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __MobiusFOPmodXMLInit("Mobius");
|
||||
static Registrar< QCD::DomainWallFermionModule<FermionImplementationPolicy>,
|
||||
HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __DWFOPmodXMLInit("DomainWall");
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Observables
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static Registrar<QCD::PlaquetteMod<ImplementationPolicy>, HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, Serialiser> > __OBSPLmodXMLInit("Plaquette");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Checkpointers
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static Registrar<QCD::BinaryCPModule<ImplementationPolicy>, HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPBinarymodXMLInit("Binary");
|
||||
static Registrar<QCD::NerscCPModule<ImplementationPolicy> , HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPNerscmodXMLInit("Nersc");
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
static Registrar<QCD::ILDGCPModule<ImplementationPolicy> , HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPILDGmodXMLInit("ILDG");
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integrators
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
static Registrar< HMCLeapFrog<ImplementationPolicy, RepresentationPolicy, Serialiser> , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCLFmodXMLInit("LeapFrog");
|
||||
static Registrar< HMCMinimumNorm2<ImplementationPolicy, RepresentationPolicy, Serialiser> , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCMN2modXMLInit("MinimumNorm2");
|
||||
static Registrar< HMCForceGradient<ImplementationPolicy, RepresentationPolicy, Serialiser> , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCFGmodXMLInit("ForceGradient");
|
||||
|
||||
typedef HMCRunnerModuleFactory<hmc_string, Serialiser > HMCModuleFactory;
|
||||
|
||||
#endif
|
@ -1,138 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/SolverModules.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef SOLVER_MODULES_H
|
||||
#define SOLVER_MODULES_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Operator Functions (Solvers)
|
||||
//////////////////////////////////////////////
|
||||
|
||||
template <template <typename> class SolverType, class Field, class SPar>
|
||||
class SolverModule
|
||||
: public Parametrized<SPar>,
|
||||
public HMCModuleBase<OperatorFunction<Field> > {
|
||||
public:
|
||||
typedef HMCModuleBase< OperatorFunction<Field> > Base;
|
||||
typedef typename Base::Product Product;
|
||||
|
||||
std::unique_ptr< SolverType<Field> > SolverPtr;
|
||||
|
||||
SolverModule(SPar Par) : Parametrized<SPar>(Par) {}
|
||||
|
||||
template <class ReaderClass>
|
||||
SolverModule(Reader<ReaderClass>& Reader) : Parametrized<SPar>(Reader){};
|
||||
|
||||
virtual void print_parameters(){
|
||||
std::cout << this->Par_ << std::endl;
|
||||
}
|
||||
|
||||
Product* getPtr() {
|
||||
if (!SolverPtr) initialize();
|
||||
|
||||
return SolverPtr.get();
|
||||
}
|
||||
|
||||
private:
|
||||
virtual void initialize() = 0;
|
||||
};
|
||||
|
||||
|
||||
// Factory
|
||||
template <char const *str, class Field, class ReaderClass >
|
||||
class HMC_SolverModuleFactory
|
||||
: public Factory < HMCModuleBase<OperatorFunction<Field> > , Reader<ReaderClass> > {
|
||||
public:
|
||||
// use SINGLETON FUNCTOR MACRO HERE
|
||||
typedef Reader<ReaderClass> TheReader;
|
||||
|
||||
HMC_SolverModuleFactory(const HMC_SolverModuleFactory& e) = delete;
|
||||
void operator=(const HMC_SolverModuleFactory& e) = delete;
|
||||
static HMC_SolverModuleFactory& getInstance(void) {
|
||||
static HMC_SolverModuleFactory e;
|
||||
return e;
|
||||
}
|
||||
|
||||
private:
|
||||
HMC_SolverModuleFactory(void) = default;
|
||||
std::string obj_type() const {
|
||||
return std::string(str);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
class SolverParameters : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(SolverParameters,
|
||||
RealD, tolerance,
|
||||
RealD, max_iterations);
|
||||
// add error on no convergence?
|
||||
};
|
||||
|
||||
|
||||
class SolverObjName: Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(SolverObjName,
|
||||
std::string, name,
|
||||
SolverParameters, parameters);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <class Field >
|
||||
class ConjugateGradientModule: public SolverModule<ConjugateGradient, Field, SolverParameters> {
|
||||
typedef SolverModule<ConjugateGradient, Field, SolverParameters> SolverBase;
|
||||
using SolverBase::SolverBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->SolverPtr.reset(new ConjugateGradient<Field>(this->Par_.tolerance, this->Par_.max_iterations, true));
|
||||
}
|
||||
};
|
||||
|
||||
template <class Field >
|
||||
class ConjugateResidualModule: public SolverModule<ConjugateResidual, Field, SolverParameters> {
|
||||
typedef SolverModule<ConjugateResidual, Field, SolverParameters> SolverBase;
|
||||
using SolverBase::SolverBase; // for constructors
|
||||
|
||||
// acquire resource
|
||||
virtual void initialize(){
|
||||
this->SolverPtr.reset(new ConjugateResidual<Field>(this->Par_.tolerance, this->Par_.max_iterations));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
extern char solver_string[];
|
||||
} // Grid
|
||||
|
||||
|
||||
#endif //SOLVER_MODULES_H
|
@ -1,46 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef MODS_H
|
||||
#define MODS_H
|
||||
|
||||
// Modules files
|
||||
|
||||
#include <Grid/qcd/modules/Factory.h>
|
||||
#include <Grid/qcd/modules/Modules.h>
|
||||
|
||||
|
||||
#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>
|
||||
#include <Grid/qcd/modules/SolverModules.h>
|
||||
#include <Grid/qcd/modules/FermionOperatorModules.h>
|
||||
#include <Grid/qcd/modules/ActionModules.h>
|
||||
#include <Grid/qcd/modules/ObservableModules.h>
|
||||
|
||||
|
||||
|
||||
#endif //MODS_H
|
@ -1,51 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/observables/hmc_observable.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef HMC_OBSERVABLE_H
|
||||
#define HMC_OBSERVABLE_H
|
||||
|
||||
namespace Grid{
|
||||
|
||||
template <class Field>
|
||||
class HmcObservable {
|
||||
public:
|
||||
virtual void TrajectoryComplete(int traj,
|
||||
Field &U,
|
||||
GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) = 0;
|
||||
};
|
||||
|
||||
} // namespace Grid
|
||||
|
||||
#include "plaquette.h"
|
||||
#include "topological_charge.h"
|
||||
#include "polyakov_loop.h"
|
||||
|
||||
|
||||
#endif // HMC_OBSERVABLE_H
|
@ -1,68 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/plaquette.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef HMC_PLAQUETTE_H
|
||||
#define HMC_PLAQUETTE_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// this is only defined for a gauge theory
|
||||
template <class Impl>
|
||||
class PlaquetteLogger : public HmcObservable<typename Impl::Field> {
|
||||
public:
|
||||
// here forces the Impl to be of gauge fields
|
||||
// if not the compiler will complain
|
||||
INHERIT_GIMPL_TYPES(Impl);
|
||||
|
||||
// necessary for HmcObservable compatibility
|
||||
typedef typename Impl::Field Field;
|
||||
|
||||
void TrajectoryComplete(int traj,
|
||||
Field &U,
|
||||
GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
|
||||
RealD plaq = WilsonLoops<Impl>::avgPlaquette(U);
|
||||
|
||||
int def_prec = std::cout.precision();
|
||||
|
||||
std::cout << GridLogMessage
|
||||
<< std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||
<< "Plaquette: [ " << traj << " ] "<< plaq << std::endl;
|
||||
|
||||
std::cout.precision(def_prec);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace QCD
|
||||
} // namespace Grid
|
||||
|
||||
#endif // HMC_PLAQUETTE_H
|
@ -1,68 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/polyakov_line.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: David Preti <david.preti@csic.es>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef HMC_POLYAKOV_H
|
||||
#define HMC_POLYAKOV_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// this is only defined for a gauge theory
|
||||
template <class Impl>
|
||||
class PolyakovLogger : public HmcObservable<typename Impl::Field> {
|
||||
public:
|
||||
// here forces the Impl to be of gauge fields
|
||||
// if not the compiler will complain
|
||||
INHERIT_GIMPL_TYPES(Impl);
|
||||
|
||||
// necessary for HmcObservable compatibility
|
||||
typedef typename Impl::Field Field;
|
||||
|
||||
void TrajectoryComplete(int traj,
|
||||
Field &U,
|
||||
GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
|
||||
ComplexD polyakov = WilsonLoops<Impl>::avgPolyakovLoop(U);
|
||||
|
||||
int def_prec = std::cout.precision();
|
||||
|
||||
std::cout << GridLogMessage
|
||||
<< std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||
<< "Polyakov Loop: [ " << traj << " ] "<< polyakov << std::endl;
|
||||
|
||||
std::cout.precision(def_prec);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace QCD
|
||||
} // namespace Grid
|
||||
|
||||
#endif // HMC_POLYAKOV_H
|
@ -1,122 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/topological_charge.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef HMC_TOP_CHARGE_H
|
||||
#define HMC_TOP_CHARGE_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
struct TopologySmearingParameters : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters,
|
||||
int, steps,
|
||||
float, step_size,
|
||||
int, meas_interval,
|
||||
float, maxTau);
|
||||
|
||||
TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f):
|
||||
steps(s), step_size(ss), meas_interval(mi), maxTau(mT){}
|
||||
|
||||
template < class ReaderClass >
|
||||
TopologySmearingParameters(Reader<ReaderClass>& Reader){
|
||||
read(Reader, "Smearing", *this);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct TopologyObsParameters : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(TopologyObsParameters,
|
||||
int, interval,
|
||||
bool, do_smearing,
|
||||
TopologySmearingParameters, Smearing);
|
||||
|
||||
TopologyObsParameters(int interval = 1, bool smearing = false):
|
||||
interval(interval), Smearing(smearing){}
|
||||
|
||||
template <class ReaderClass >
|
||||
TopologyObsParameters(Reader<ReaderClass>& Reader){
|
||||
read(Reader, "TopologyMeasurement", *this);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// this is only defined for a gauge theory
|
||||
template <class Impl>
|
||||
class TopologicalCharge : public HmcObservable<typename Impl::Field> {
|
||||
TopologyObsParameters Pars;
|
||||
|
||||
public:
|
||||
// here forces the Impl to be of gauge fields
|
||||
// if not the compiler will complain
|
||||
INHERIT_GIMPL_TYPES(Impl);
|
||||
|
||||
// necessary for HmcObservable compatibility
|
||||
typedef typename Impl::Field Field;
|
||||
|
||||
TopologicalCharge(int interval = 1, bool do_smearing = false):
|
||||
Pars(interval, do_smearing){}
|
||||
|
||||
TopologicalCharge(TopologyObsParameters P):Pars(P){
|
||||
std::cout << GridLogDebug << "Creating TopologicalCharge " << std::endl;
|
||||
}
|
||||
|
||||
void TrajectoryComplete(int traj,
|
||||
Field &U,
|
||||
GridSerialRNG &sRNG,
|
||||
GridParallelRNG &pRNG) {
|
||||
|
||||
if (traj%Pars.interval == 0){
|
||||
// Smearing
|
||||
Field Usmear = U;
|
||||
int def_prec = std::cout.precision();
|
||||
|
||||
if (Pars.do_smearing){
|
||||
// using wilson flow by default here
|
||||
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
|
||||
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
|
||||
Real T0 = WF.energyDensityPlaquette(Usmear);
|
||||
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
|
||||
}
|
||||
|
||||
Real q = WilsonLoops<Impl>::TopologicalCharge(Usmear);
|
||||
std::cout << GridLogMessage
|
||||
<< std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||
<< "Topological Charge: [ " << traj << " ] "<< q << std::endl;
|
||||
|
||||
std::cout.precision(def_prec);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HMC_TOP_CHARGE_H
|
@ -1,44 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/plaquette.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
/*
|
||||
@brief Declares base smearing class Smear
|
||||
*/
|
||||
#ifndef BASE_SMEAR_
|
||||
#define BASE_SMEAR_
|
||||
|
||||
template <class Gimpl>
|
||||
class Smear{
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl) // inherits the types for the gauge fields
|
||||
|
||||
virtual ~Smear(){}
|
||||
virtual void smear (GaugeField&,const GaugeField&)const = 0;
|
||||
virtual void derivative(GaugeField&, const GaugeField&,const GaugeField&) const = 0;
|
||||
};
|
||||
#endif
|
@ -1,213 +0,0 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/modules/plaquette.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef WILSONFLOW_H
|
||||
#define WILSONFLOW_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template <class Gimpl>
|
||||
class WilsonFlow: public Smear<Gimpl>{
|
||||
unsigned int Nstep;
|
||||
unsigned int measure_interval;
|
||||
mutable RealD epsilon, taus;
|
||||
|
||||
|
||||
mutable WilsonGaugeAction<Gimpl> SG;
|
||||
|
||||
void evolve_step(typename Gimpl::GaugeField&) const;
|
||||
void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
|
||||
RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl)
|
||||
|
||||
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
|
||||
Nstep(Nstep),
|
||||
epsilon(epsilon),
|
||||
measure_interval(interval),
|
||||
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
||||
// WilsonGaugeAction with beta 3.0
|
||||
assert(epsilon > 0.0);
|
||||
LogMessage();
|
||||
}
|
||||
|
||||
void LogMessage() {
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] Nstep : " << Nstep << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
||||
}
|
||||
|
||||
virtual void smear(GaugeField&, const GaugeField&) const;
|
||||
|
||||
virtual void derivative(GaugeField&, const GaugeField&, const GaugeField&) const {
|
||||
assert(0);
|
||||
// undefined for WilsonFlow
|
||||
}
|
||||
|
||||
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
|
||||
RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
|
||||
RealD energyDensityPlaquette(const GaugeField& U) const;
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
|
||||
GaugeField Z(U._grid);
|
||||
GaugeField tmp(U._grid);
|
||||
SG.deriv(U, Z);
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
|
||||
if (maxTau - taus < epsilon){
|
||||
epsilon = maxTau-taus;
|
||||
}
|
||||
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
||||
GaugeField Z(U._grid);
|
||||
GaugeField Zprime(U._grid);
|
||||
GaugeField tmp(U._grid), Uprime(U._grid);
|
||||
Uprime = U;
|
||||
SG.deriv(U, Z);
|
||||
Zprime = -Z;
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Zprime += 2.0*tmp;
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||
|
||||
// Ramos
|
||||
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
|
||||
// Compute distance as norm^2 of the difference
|
||||
GaugeField diffU = U - Uprime;
|
||||
RealD diff = norm2(diffU);
|
||||
// adjust integration step
|
||||
|
||||
taus += epsilon;
|
||||
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
|
||||
|
||||
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
|
||||
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
|
||||
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
|
||||
RealD td = tau(step);
|
||||
return 2.0 * td * td * SG.S(U)/U._grid->gSites();
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
|
||||
return 2.0 * taus * taus * SG.S(U)/U._grid->gSites();
|
||||
}
|
||||
|
||||
|
||||
//#define WF_TIMING
|
||||
|
||||
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
|
||||
out = in;
|
||||
for (unsigned int step = 1; step <= Nstep; step++) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
evolve_step(out);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> diff = end - start;
|
||||
#ifdef WF_TIMING
|
||||
std::cout << "Time to evolve " << diff.count() << " s\n";
|
||||
#endif
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
||||
<< step << " " << tau(step) << " "
|
||||
<< energyDensityPlaquette(step,out) << std::endl;
|
||||
if( step % measure_interval == 0){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
||||
<< step << " "
|
||||
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
|
||||
out = in;
|
||||
taus = epsilon;
|
||||
unsigned int step = 0;
|
||||
do{
|
||||
step++;
|
||||
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
||||
evolve_step_adaptive(out, maxTau);
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
||||
<< step << " " << taus << " "
|
||||
<< energyDensityPlaquette(out) << std::endl;
|
||||
if( step % measure_interval == 0){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
||||
<< step << " "
|
||||
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
|
||||
}
|
||||
} while (taus < maxTau);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
} // namespace QCD
|
||||
} // namespace Grid
|
||||
|
||||
#endif // WILSONFLOW_H
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user