mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			feature/a2
			...
			feature/ha
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					e57eafe388 | 
							
								
								
									
										23
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										23
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -83,7 +83,6 @@ ltmain.sh
 | 
			
		||||
.Trashes
 | 
			
		||||
ehthumbs.db
 | 
			
		||||
Thumbs.db
 | 
			
		||||
.dirstamp
 | 
			
		||||
 | 
			
		||||
# build directory #
 | 
			
		||||
###################
 | 
			
		||||
@@ -93,24 +92,28 @@ build*/*
 | 
			
		||||
#####################
 | 
			
		||||
*.xcodeproj/*
 | 
			
		||||
build.sh
 | 
			
		||||
.vscode
 | 
			
		||||
*.code-workspace
 | 
			
		||||
 | 
			
		||||
# Eigen source #
 | 
			
		||||
################
 | 
			
		||||
Grid/Eigen
 | 
			
		||||
Eigen/*
 | 
			
		||||
lib/Eigen/*
 | 
			
		||||
 | 
			
		||||
# FFTW source #
 | 
			
		||||
################
 | 
			
		||||
lib/fftw/*
 | 
			
		||||
 | 
			
		||||
# libtool macros #
 | 
			
		||||
##################
 | 
			
		||||
m4/lt*
 | 
			
		||||
m4/libtool.m4
 | 
			
		||||
 | 
			
		||||
# github pages #
 | 
			
		||||
################
 | 
			
		||||
gh-pages/
 | 
			
		||||
# Buck files #
 | 
			
		||||
##############
 | 
			
		||||
.buck*
 | 
			
		||||
buck-out
 | 
			
		||||
BUCK
 | 
			
		||||
make-bin-BUCK.sh
 | 
			
		||||
 | 
			
		||||
# generated sources #
 | 
			
		||||
#####################
 | 
			
		||||
Grid/qcd/spin/gamma-gen/*.h
 | 
			
		||||
Grid/qcd/spin/gamma-gen/*.cc
 | 
			
		||||
lib/qcd/spin/gamma-gen/*.h
 | 
			
		||||
lib/qcd/spin/gamma-gen/*.cc
 | 
			
		||||
							
								
								
									
										94
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										94
									
								
								.travis.yml
									
									
									
									
									
								
							@@ -9,11 +9,62 @@ matrix:
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode8.3
 | 
			
		||||
      compiler: clang
 | 
			
		||||
      env: PREC=single
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode8.3
 | 
			
		||||
      compiler: clang
 | 
			
		||||
      env: PREC=double
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.9
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-4.9
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-5
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-5
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.8
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.8
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
      
 | 
			
		||||
before_install:
 | 
			
		||||
    - export GRIDDIR=`pwd`
 | 
			
		||||
@@ -21,41 +72,32 @@ before_install:
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
    - export CWD=`pwd`
 | 
			
		||||
    - echo $CWD
 | 
			
		||||
    - export CC=$CC$VERSION
 | 
			
		||||
    - export CXX=$CXX$VERSION
 | 
			
		||||
    - echo $PATH
 | 
			
		||||
    - which autoconf
 | 
			
		||||
    - autoconf  --version
 | 
			
		||||
    - which automake
 | 
			
		||||
    - automake  --version
 | 
			
		||||
    - which $CC
 | 
			
		||||
    - $CC  --version
 | 
			
		||||
    - which $CXX
 | 
			
		||||
    - $CXX --version
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./bootstrap.sh
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - mkdir lime
 | 
			
		||||
    - cd lime
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
 | 
			
		||||
    - tar xf lime-1.3.2.tar.gz
 | 
			
		||||
    - cd lime-1.3.2
 | 
			
		||||
    - ./configure --prefix=$CWD/build/lime/install
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - make install
 | 
			
		||||
    - cd $CWD/build
 | 
			
		||||
    - ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4 
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
 | 
			
		||||
    - make check
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,37 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/DisableWarnings.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef DISABLE_WARNINGS_H
 | 
			
		||||
#define DISABLE_WARNINGS_H
 | 
			
		||||
 | 
			
		||||
 //disables and intel compiler specific warning (in json.hpp)
 | 
			
		||||
#pragma warning disable 488  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,29 +0,0 @@
 | 
			
		||||
#ifndef GRID_STD_H
 | 
			
		||||
#define GRID_STD_H
 | 
			
		||||
 | 
			
		||||
///////////////////
 | 
			
		||||
// Std C++ dependencies
 | 
			
		||||
///////////////////
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <complex>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <random>
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <signal.h>
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <zlib.h>
 | 
			
		||||
 | 
			
		||||
///////////////////
 | 
			
		||||
// Grid config
 | 
			
		||||
///////////////////
 | 
			
		||||
#include "Config.h"
 | 
			
		||||
 | 
			
		||||
#endif /* GRID_STD_H */
 | 
			
		||||
@@ -1,14 +0,0 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
 | 
			
		||||
#ifdef USE_MKL
 | 
			
		||||
#define EIGEN_USE_MKL_ALL
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined __GNUC__
 | 
			
		||||
#pragma GCC diagnostic push
 | 
			
		||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 | 
			
		||||
#endif
 | 
			
		||||
#include <Grid/Eigen/Dense>
 | 
			
		||||
#if defined __GNUC__
 | 
			
		||||
#pragma GCC diagnostic pop
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,63 +0,0 @@
 | 
			
		||||
extra_sources=
 | 
			
		||||
extra_headers=
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_MPI3
 | 
			
		||||
  extra_sources+=communicator/Communicator_mpi3.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
  extra_sources+=communicator/SharedMemoryMPI.cc
 | 
			
		||||
  extra_sources+=communicator/SharedMemory.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_NONE
 | 
			
		||||
  extra_sources+=communicator/Communicator_none.cc
 | 
			
		||||
  extra_sources+=communicator/Communicator_base.cc
 | 
			
		||||
  extra_sources+=communicator/SharedMemoryNone.cc
 | 
			
		||||
  extra_sources+=communicator/SharedMemory.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_HDF5
 | 
			
		||||
  extra_sources+=serialisation/Hdf5IO.cc 
 | 
			
		||||
  extra_headers+=serialisation/Hdf5IO.h
 | 
			
		||||
  extra_headers+=serialisation/Hdf5Type.h
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
all: version-cache
 | 
			
		||||
 | 
			
		||||
version-cache:
 | 
			
		||||
	@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
 | 
			
		||||
		a="uncommited changes";\
 | 
			
		||||
	else\
 | 
			
		||||
		a="clean";\
 | 
			
		||||
	fi;\
 | 
			
		||||
	echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
 | 
			
		||||
	if [ -e version-cache ]; then\
 | 
			
		||||
		d=`diff vertmp version-cache`;\
 | 
			
		||||
		if [ "$${d}" != "" ]; then\
 | 
			
		||||
			mv vertmp version-cache;\
 | 
			
		||||
			rm -f Version.h;\
 | 
			
		||||
		fi;\
 | 
			
		||||
	else\
 | 
			
		||||
		mv vertmp version-cache;\
 | 
			
		||||
		rm -f Version.h;\
 | 
			
		||||
	fi;\
 | 
			
		||||
	rm -f vertmp
 | 
			
		||||
 | 
			
		||||
Version.h:
 | 
			
		||||
	cp version-cache Version.h
 | 
			
		||||
 | 
			
		||||
.PHONY: version-cache
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Libraries
 | 
			
		||||
#
 | 
			
		||||
include Make.inc
 | 
			
		||||
include Eigen.inc
 | 
			
		||||
 | 
			
		||||
lib_LIBRARIES = libGrid.a
 | 
			
		||||
 | 
			
		||||
CCFILES += $(extra_sources)
 | 
			
		||||
HFILES  += $(extra_headers) Config.h Version.h
 | 
			
		||||
 | 
			
		||||
libGrid_a_SOURCES              = $(CCFILES)
 | 
			
		||||
libGrid_adir                   = $(includedir)/Grid
 | 
			
		||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
 | 
			
		||||
@@ -1,152 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/algorithms/approx/Forecast.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef INCLUDED_FORECAST_H
 | 
			
		||||
#define INCLUDED_FORECAST_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // Abstract base class.
 | 
			
		||||
  // Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
 | 
			
		||||
  // and returns a forecasted solution to the system D*psi = phi (psi).
 | 
			
		||||
  template<class Matrix, class Field>
 | 
			
		||||
  class Forecast
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
 | 
			
		||||
  // used to forecast solutions across poles of the EOFA heatbath.
 | 
			
		||||
  //
 | 
			
		||||
  // Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
 | 
			
		||||
  template<class Matrix, class Field>
 | 
			
		||||
  class ChronoForecast : public Forecast<Matrix,Field>
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
 | 
			
		||||
      {
 | 
			
		||||
        int degree = prev_solns.size();
 | 
			
		||||
        Field chi(phi); // forecasted solution
 | 
			
		||||
 | 
			
		||||
        // Trivial cases
 | 
			
		||||
        if(degree == 0){ chi = zero; return chi; }
 | 
			
		||||
        else if(degree == 1){ return prev_solns[0]; }
 | 
			
		||||
 | 
			
		||||
        RealD dot;
 | 
			
		||||
        ComplexD xp;
 | 
			
		||||
        Field r(phi); // residual
 | 
			
		||||
        Field Mv(phi);
 | 
			
		||||
        std::vector<Field> v(prev_solns); // orthonormalized previous solutions
 | 
			
		||||
        std::vector<Field> MdagMv(degree,phi);
 | 
			
		||||
 | 
			
		||||
        // Array to hold the matrix elements
 | 
			
		||||
        std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
 | 
			
		||||
 | 
			
		||||
        // Solution and source vectors
 | 
			
		||||
        std::vector<ComplexD> a(degree);
 | 
			
		||||
        std::vector<ComplexD> b(degree);
 | 
			
		||||
 | 
			
		||||
        // Orthonormalize the vector basis
 | 
			
		||||
        for(int i=0; i<degree; i++){
 | 
			
		||||
          v[i] *= 1.0/std::sqrt(norm2(v[i]));
 | 
			
		||||
          for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Perform sparse matrix multiplication and construct rhs
 | 
			
		||||
        for(int i=0; i<degree; i++){
 | 
			
		||||
          b[i] = innerProduct(v[i],phi);
 | 
			
		||||
          Mat.M(v[i],Mv);
 | 
			
		||||
          Mat.Mdag(Mv,MdagMv[i]);
 | 
			
		||||
          G[i][i] = innerProduct(v[i],MdagMv[i]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Construct the matrix
 | 
			
		||||
        for(int j=0; j<degree; j++){
 | 
			
		||||
        for(int k=j+1; k<degree; k++){
 | 
			
		||||
          G[j][k] = innerProduct(v[j],MdagMv[k]);
 | 
			
		||||
          G[k][j] = std::conj(G[j][k]);
 | 
			
		||||
        }}
 | 
			
		||||
 | 
			
		||||
        // Gauss-Jordan elimination with partial pivoting
 | 
			
		||||
        for(int i=0; i<degree; i++){
 | 
			
		||||
 | 
			
		||||
          // Perform partial pivoting
 | 
			
		||||
          int k = i;
 | 
			
		||||
          for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
 | 
			
		||||
          if(k != i){
 | 
			
		||||
            xp = b[k];
 | 
			
		||||
            b[k] = b[i];
 | 
			
		||||
            b[i] = xp;
 | 
			
		||||
            for(int j=0; j<degree; j++){
 | 
			
		||||
              xp = G[k][j];
 | 
			
		||||
              G[k][j] = G[i][j];
 | 
			
		||||
              G[i][j] = xp;
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          // Convert matrix to upper triangular form
 | 
			
		||||
          for(int j=i+1; j<degree; j++){
 | 
			
		||||
            xp = G[j][i]/G[i][i];
 | 
			
		||||
            b[j] -= xp * b[i];
 | 
			
		||||
            for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Use Gaussian elimination to solve equations and calculate initial guess
 | 
			
		||||
        chi = zero;
 | 
			
		||||
        r = phi;
 | 
			
		||||
        for(int i=degree-1; i>=0; i--){
 | 
			
		||||
          a[i] = 0.0;
 | 
			
		||||
          for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
 | 
			
		||||
          a[i] = (b[i]-a[i])/G[i][i];
 | 
			
		||||
          chi += a[i]*v[i];
 | 
			
		||||
          r -= a[i]*MdagMv[i];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        RealD true_r(0.0);
 | 
			
		||||
        ComplexD tmp;
 | 
			
		||||
        for(int i=0; i<degree; i++){
 | 
			
		||||
          tmp = -b[i];
 | 
			
		||||
          for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
 | 
			
		||||
          tmp = std::conj(tmp)*tmp;
 | 
			
		||||
          true_r += std::sqrt(tmp.real());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        RealD error = std::sqrt(norm2(r)/norm2(phi));
 | 
			
		||||
        std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
 | 
			
		||||
 | 
			
		||||
        return chi;
 | 
			
		||||
      };
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,606 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
 | 
			
		||||
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Block conjugate gradient. Dimension zero should be the block direction
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class Field>
 | 
			
		||||
class BlockConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  typedef typename Field::scalar_type scomplex;
 | 
			
		||||
 | 
			
		||||
  int blockDim ;
 | 
			
		||||
  int Nblock;
 | 
			
		||||
 | 
			
		||||
  BlockCGtype CGtype;
 | 
			
		||||
  bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
 | 
			
		||||
                           // Defaults true.
 | 
			
		||||
  RealD Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
  
 | 
			
		||||
  BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
    : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Thin QR factorisation (google it)
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
 | 
			
		||||
		 Eigen::MatrixXcd &C,
 | 
			
		||||
		 Eigen::MatrixXcd &Cinv,
 | 
			
		||||
		 Field & Q,
 | 
			
		||||
		 const Field & R)
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //Dimensions
 | 
			
		||||
  // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock
 | 
			
		||||
  //
 | 
			
		||||
  // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen)
 | 
			
		||||
  //
 | 
			
		||||
  //   Q  C = R => Q = R C^{-1}
 | 
			
		||||
  //
 | 
			
		||||
  // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} 
 | 
			
		||||
  //
 | 
			
		||||
  // Set C = L^{dag}, and then Q^dag Q = ident 
 | 
			
		||||
  //
 | 
			
		||||
  // Checks:
 | 
			
		||||
  // Cdag C = Rdag R ; passes.
 | 
			
		||||
  // QdagQ  = 1      ; passes
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
 | 
			
		||||
  // Force manifest hermitian to avoid rounding related
 | 
			
		||||
  m_rr = 0.5*(m_rr+m_rr.adjoint());
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  std::cout << " Calling Cholesky  ldlt on m_rr "  << m_rr <<std::endl;
 | 
			
		||||
  Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL(); 
 | 
			
		||||
  std::cout << " Called Cholesky  ldlt on m_rr "  << L_ldlt <<std::endl;
 | 
			
		||||
  auto  D_ldlt = m_rr.ldlt().vectorD(); 
 | 
			
		||||
  std::cout << " Called Cholesky  ldlt on m_rr "  << D_ldlt <<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  //  std::cout << " Calling Cholesky  llt on m_rr "  <<std::endl;
 | 
			
		||||
  Eigen::MatrixXcd L    = m_rr.llt().matrixL(); 
 | 
			
		||||
  //  std::cout << " Called Cholesky  llt on m_rr "  << L <<std::endl;
 | 
			
		||||
  C    = L.adjoint();
 | 
			
		||||
  Cinv = C.inverse();
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Q = R C^{-1}
 | 
			
		||||
  //
 | 
			
		||||
  // Q_j  = R_i Cinv(i,j) 
 | 
			
		||||
  //
 | 
			
		||||
  // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  sliceMulMatrix(Q,Cinv,R,Orthog);
 | 
			
		||||
}
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Call one of several implementations
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
 | 
			
		||||
{
 | 
			
		||||
  if ( CGtype == BlockCGrQ ) {
 | 
			
		||||
    BlockCGrQsolve(Linop,Src,Psi);
 | 
			
		||||
  } else if (CGtype == BlockCG ) {
 | 
			
		||||
    BlockCGsolve(Linop,Src,Psi);
 | 
			
		||||
  } else if (CGtype == CGmultiRHS ) {
 | 
			
		||||
    CGmultiRHSsolve(Linop,Src,Psi);
 | 
			
		||||
  } else {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// BlockCGrQ implementation:
 | 
			
		||||
//--------------------------
 | 
			
		||||
// X is guess/Solution
 | 
			
		||||
// B is RHS
 | 
			
		||||
// Solve A X_i = B_i    ;        i refers to Nblock index
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X) 
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  Nblock = B._grid->_fdimensions[Orthog];
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  X.checkerboard = B.checkerboard;
 | 
			
		||||
  conformable(X, B);
 | 
			
		||||
 | 
			
		||||
  Field tmp(B);
 | 
			
		||||
  Field Q(B);
 | 
			
		||||
  Field D(B);
 | 
			
		||||
  Field Z(B);
 | 
			
		||||
  Field AD(B);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  // Initial residual computation & set up
 | 
			
		||||
  std::vector<RealD> residuals(Nblock);
 | 
			
		||||
  std::vector<RealD> ssq(Nblock);
 | 
			
		||||
 | 
			
		||||
  sliceNorm(ssq,B,Orthog);
 | 
			
		||||
  RealD sssum=0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,B,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,X,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  /************************************************************************
 | 
			
		||||
   * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
 | 
			
		||||
   ************************************************************************
 | 
			
		||||
   * Dimensions:
 | 
			
		||||
   *
 | 
			
		||||
   *   X,B==(Nferm x Nblock)
 | 
			
		||||
   *   A==(Nferm x Nferm)
 | 
			
		||||
   *  
 | 
			
		||||
   * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
 | 
			
		||||
   * 
 | 
			
		||||
   * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it)
 | 
			
		||||
   * for k: 
 | 
			
		||||
   *   Z  = AD
 | 
			
		||||
   *   M  = [D^dag Z]^{-1}
 | 
			
		||||
   *   X  = X + D MC
 | 
			
		||||
   *   QS = Q - ZM
 | 
			
		||||
   *   D  = Q + D S^dag
 | 
			
		||||
   *   C  = S C
 | 
			
		||||
   */
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  // Initial block: initial search dir is guess
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
 | 
			
		||||
 | 
			
		||||
  //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it)
 | 
			
		||||
 | 
			
		||||
  Linop.HermOp(X, AD);
 | 
			
		||||
  tmp = B - AD;  
 | 
			
		||||
  //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
 | 
			
		||||
  ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
 | 
			
		||||
  //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
 | 
			
		||||
  //std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
 | 
			
		||||
  D=Q;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  // Timers
 | 
			
		||||
  ///////////////////////////////////////
 | 
			
		||||
  GridStopWatch sliceInnerTimer;
 | 
			
		||||
  GridStopWatch sliceMaddTimer;
 | 
			
		||||
  GridStopWatch QRTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
  int k;
 | 
			
		||||
  for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
 | 
			
		||||
    //3. Z  = AD
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    Linop.HermOp(D, Z);      
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
    //std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
 | 
			
		||||
 | 
			
		||||
    //4. M  = [D^dag Z]^{-1}
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_M       = m_DZ.inverse();
 | 
			
		||||
    //std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    //5. X  = X + D MC
 | 
			
		||||
    m_tmp     = m_M * m_C;
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(X,m_tmp, D,X,Orthog);     
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //6. QS = Q - ZM
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
    QRTimer.Start();
 | 
			
		||||
    ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
 | 
			
		||||
    QRTimer.Stop();
 | 
			
		||||
    
 | 
			
		||||
    //7. D  = Q + D S^dag
 | 
			
		||||
    m_tmp = m_S.adjoint();
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    //8. C  = S C
 | 
			
		||||
    m_C = m_S*m_C;
 | 
			
		||||
    
 | 
			
		||||
    /*********************
 | 
			
		||||
     * convergence monitor
 | 
			
		||||
     *********************
 | 
			
		||||
     */
 | 
			
		||||
    m_rr = m_C.adjoint() * m_C;
 | 
			
		||||
 | 
			
		||||
    RealD max_resid=0;
 | 
			
		||||
    RealD rrsum=0;
 | 
			
		||||
    RealD rr;
 | 
			
		||||
 | 
			
		||||
    for(int b=0;b<Nblock;b++) {
 | 
			
		||||
      rrsum+=real(m_rr(b,b));
 | 
			
		||||
      rr = real(m_rr(b,b))/ssq[b];
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
 | 
			
		||||
	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
 | 
			
		||||
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
 | 
			
		||||
 | 
			
		||||
      for(int b=0;b<Nblock;b++){
 | 
			
		||||
	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
 | 
			
		||||
		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(X, AD);
 | 
			
		||||
      AD = AD-B;
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl;
 | 
			
		||||
	    
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
  if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim; this is an assumption
 | 
			
		||||
  Nblock = Src._grid->_fdimensions[Orthog];
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  Psi.checkerboard = Src.checkerboard;
 | 
			
		||||
  conformable(Psi, Src);
 | 
			
		||||
 | 
			
		||||
  Field P(Src);
 | 
			
		||||
  Field AP(Src);
 | 
			
		||||
  Field R(Src);
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
  Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock);
 | 
			
		||||
 | 
			
		||||
  // Initial residual computation & set up
 | 
			
		||||
  std::vector<RealD> residuals(Nblock);
 | 
			
		||||
  std::vector<RealD> ssq(Nblock);
 | 
			
		||||
 | 
			
		||||
  sliceNorm(ssq,Src,Orthog);
 | 
			
		||||
  RealD sssum=0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Src,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Psi,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  // Initial search dir is guess
 | 
			
		||||
  Linop.HermOp(Psi, AP);
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  /************************************************************************
 | 
			
		||||
   * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
 | 
			
		||||
   ************************************************************************
 | 
			
		||||
   * O'Leary : R = B - A X
 | 
			
		||||
   * O'Leary : P = M R ; preconditioner M = 1
 | 
			
		||||
   * O'Leary : alpha = PAP^{-1} RMR
 | 
			
		||||
   * O'Leary : beta  = RMR^{-1}_old RMR_new
 | 
			
		||||
   * O'Leary : X=X+Palpha
 | 
			
		||||
   * O'Leary : R_new=R_old-AP alpha
 | 
			
		||||
   * O'Leary : P=MR_new+P beta
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
  R = Src - AP;  
 | 
			
		||||
  P = R;
 | 
			
		||||
  sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
 | 
			
		||||
  GridStopWatch sliceInnerTimer;
 | 
			
		||||
  GridStopWatch sliceMaddTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
  int k;
 | 
			
		||||
  for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
 | 
			
		||||
    RealD rrsum=0;
 | 
			
		||||
    for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
 | 
			
		||||
	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    Linop.HermOp(P, AP);
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Alpha
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_pAp_inv = m_pAp.inverse();
 | 
			
		||||
    m_alpha   = m_pAp_inv * m_rr ;
 | 
			
		||||
 | 
			
		||||
    // Psi, R update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi
 | 
			
		||||
    sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Beta
 | 
			
		||||
    m_rr_inv = m_rr.inverse();
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductMatrix(m_rr,R,R,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    m_beta = m_rr_inv *m_rr;
 | 
			
		||||
 | 
			
		||||
    // Search update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddMatrix(AP,m_beta,P,R,Orthog);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
    P= AP;
 | 
			
		||||
 | 
			
		||||
    /*********************
 | 
			
		||||
     * convergence monitor
 | 
			
		||||
     *********************
 | 
			
		||||
     */
 | 
			
		||||
    RealD max_resid=0;
 | 
			
		||||
    RealD rr;
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      rr = real(m_rr(b,b))/ssq[b];
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
 | 
			
		||||
      for(int b=0;b<Nblock;b++){
 | 
			
		||||
	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
 | 
			
		||||
		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(Psi, AP);
 | 
			
		||||
      AP = AP-Src;
 | 
			
		||||
      std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
 | 
			
		||||
	    
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
  if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
 | 
			
		||||
// Use this for spread out across nodes
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) 
 | 
			
		||||
{
 | 
			
		||||
  int Orthog = blockDim; // First dimension is block dim
 | 
			
		||||
  Nblock = Src._grid->_fdimensions[Orthog];
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
 | 
			
		||||
 | 
			
		||||
  Psi.checkerboard = Src.checkerboard;
 | 
			
		||||
  conformable(Psi, Src);
 | 
			
		||||
 | 
			
		||||
  Field P(Src);
 | 
			
		||||
  Field AP(Src);
 | 
			
		||||
  Field R(Src);
 | 
			
		||||
  
 | 
			
		||||
  std::vector<ComplexD> v_pAp(Nblock);
 | 
			
		||||
  std::vector<RealD> v_rr (Nblock);
 | 
			
		||||
  std::vector<RealD> v_rr_inv(Nblock);
 | 
			
		||||
  std::vector<RealD> v_alpha(Nblock);
 | 
			
		||||
  std::vector<RealD> v_beta(Nblock);
 | 
			
		||||
 | 
			
		||||
  // Initial residual computation & set up
 | 
			
		||||
  std::vector<RealD> residuals(Nblock);
 | 
			
		||||
  std::vector<RealD> ssq(Nblock);
 | 
			
		||||
 | 
			
		||||
  sliceNorm(ssq,Src,Orthog);
 | 
			
		||||
  RealD sssum=0;
 | 
			
		||||
  for(int b=0;b<Nblock;b++) sssum+=ssq[b];
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Src,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  sliceNorm(residuals,Psi,Orthog);
 | 
			
		||||
  for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
 | 
			
		||||
 | 
			
		||||
  // Initial search dir is guess
 | 
			
		||||
  Linop.HermOp(Psi, AP);
 | 
			
		||||
 | 
			
		||||
  R = Src - AP;  
 | 
			
		||||
  P = R;
 | 
			
		||||
  sliceNorm(v_rr,R,Orthog);
 | 
			
		||||
 | 
			
		||||
  GridStopWatch sliceInnerTimer;
 | 
			
		||||
  GridStopWatch sliceMaddTimer;
 | 
			
		||||
  GridStopWatch sliceNormTimer;
 | 
			
		||||
  GridStopWatch MatrixTimer;
 | 
			
		||||
  GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
  SolverTimer.Start();
 | 
			
		||||
  int k;
 | 
			
		||||
  for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
 | 
			
		||||
    RealD rrsum=0;
 | 
			
		||||
    for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
 | 
			
		||||
	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
 | 
			
		||||
 | 
			
		||||
    MatrixTimer.Start();
 | 
			
		||||
    Linop.HermOp(P, AP);
 | 
			
		||||
    MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Alpha
 | 
			
		||||
    sliceInnerTimer.Start();
 | 
			
		||||
    sliceInnerProductVector(v_pAp,P,AP,Orthog);
 | 
			
		||||
    sliceInnerTimer.Stop();
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      v_alpha[b] = v_rr[b]/real(v_pAp[b]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Psi, R update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi
 | 
			
		||||
    sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    // Beta
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      v_rr_inv[b] = 1.0/v_rr[b];
 | 
			
		||||
    }
 | 
			
		||||
    sliceNormTimer.Start();
 | 
			
		||||
    sliceNorm(v_rr,R,Orthog);
 | 
			
		||||
    sliceNormTimer.Stop();
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      v_beta[b] = v_rr_inv[b] *v_rr[b];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Search update
 | 
			
		||||
    sliceMaddTimer.Start();
 | 
			
		||||
    sliceMaddVector(P,v_beta,P,R,Orthog);
 | 
			
		||||
    sliceMaddTimer.Stop();
 | 
			
		||||
 | 
			
		||||
    /*********************
 | 
			
		||||
     * convergence monitor
 | 
			
		||||
     *********************
 | 
			
		||||
     */
 | 
			
		||||
    RealD max_resid=0;
 | 
			
		||||
    for(int b=0;b<Nblock;b++){
 | 
			
		||||
      RealD rr = v_rr[b]/ssq[b];
 | 
			
		||||
      if ( rr > max_resid ) max_resid = rr;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if ( max_resid < Tolerance*Tolerance ) { 
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
 | 
			
		||||
      for(int b=0;b<Nblock;b++){
 | 
			
		||||
	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
 | 
			
		||||
 | 
			
		||||
      Linop.HermOp(Psi, AP);
 | 
			
		||||
      AP = AP-Src;
 | 
			
		||||
      std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
 | 
			
		||||
 | 
			
		||||
  if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
  IterationsToComplete = k;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,256 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
 | 
			
		||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
 | 
			
		||||
  class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
 | 
			
		||||
  public:
 | 
			
		||||
    bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge.
 | 
			
		||||
    // Defaults true.
 | 
			
		||||
    RealD Tolerance;
 | 
			
		||||
    Integer MaxIterations;
 | 
			
		||||
    Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
 | 
			
		||||
    Integer ReliableUpdatesPerformed;
 | 
			
		||||
 | 
			
		||||
    bool DoFinalCleanup; //Final DP cleanup, defaults to true
 | 
			
		||||
    Integer IterationsToCleanup; //Final DP cleanup step iterations
 | 
			
		||||
    
 | 
			
		||||
    LinearOperatorBase<FieldF> &Linop_f;
 | 
			
		||||
    LinearOperatorBase<FieldD> &Linop_d;
 | 
			
		||||
    GridBase* SinglePrecGrid;
 | 
			
		||||
    RealD Delta; //reliable update parameter
 | 
			
		||||
 | 
			
		||||
    //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
 | 
			
		||||
    LinearOperatorBase<FieldF> *Linop_fallback;
 | 
			
		||||
    RealD fallback_transition_tol;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
 | 
			
		||||
      : Tolerance(tol),
 | 
			
		||||
        MaxIterations(maxit),
 | 
			
		||||
	Delta(_delta),
 | 
			
		||||
	Linop_f(_Linop_f),
 | 
			
		||||
	Linop_d(_Linop_d),
 | 
			
		||||
	SinglePrecGrid(_sp_grid),
 | 
			
		||||
        ErrorOnNoConverge(err_on_no_conv),
 | 
			
		||||
	DoFinalCleanup(true),
 | 
			
		||||
	Linop_fallback(NULL)
 | 
			
		||||
    {};
 | 
			
		||||
 | 
			
		||||
    void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
 | 
			
		||||
      Linop_fallback = &_Linop_fallback;
 | 
			
		||||
      fallback_transition_tol = _fallback_transition_tol;      
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    void operator()(const FieldD &src, FieldD &psi) {
 | 
			
		||||
      LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
 | 
			
		||||
      bool using_fallback = false;
 | 
			
		||||
      
 | 
			
		||||
      psi.checkerboard = src.checkerboard;
 | 
			
		||||
      conformable(psi, src);
 | 
			
		||||
 | 
			
		||||
      RealD cp, c, a, d, b, ssq, qq, b_pred;
 | 
			
		||||
 | 
			
		||||
      FieldD p(src);
 | 
			
		||||
      FieldD mmp(src);
 | 
			
		||||
      FieldD r(src);
 | 
			
		||||
 | 
			
		||||
      // Initial residual computation & set up
 | 
			
		||||
      RealD guess = norm2(psi);
 | 
			
		||||
      assert(std::isnan(guess) == 0);
 | 
			
		||||
    
 | 
			
		||||
      Linop_d.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
      r = src - mmp;
 | 
			
		||||
      p = r;
 | 
			
		||||
 | 
			
		||||
      a = norm2(p);
 | 
			
		||||
      cp = a;
 | 
			
		||||
      ssq = norm2(src);
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   src " << ssq << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:    mp " << d << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:   mmp " << b << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:  cp,r " << cp << std::endl;
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate:     p " << a << std::endl;
 | 
			
		||||
 | 
			
		||||
      RealD rsq = Tolerance * Tolerance * ssq;
 | 
			
		||||
 | 
			
		||||
      // Check if guess is really REALLY good :)
 | 
			
		||||
      if (cp <= rsq) {
 | 
			
		||||
	std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
 | 
			
		||||
	std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      //Single prec initialization
 | 
			
		||||
      FieldF r_f(SinglePrecGrid);
 | 
			
		||||
      r_f.checkerboard = r.checkerboard;
 | 
			
		||||
      precisionChange(r_f, r);
 | 
			
		||||
 | 
			
		||||
      FieldF psi_f(r_f);
 | 
			
		||||
      psi_f = zero;
 | 
			
		||||
 | 
			
		||||
      FieldF p_f(r_f);
 | 
			
		||||
      FieldF mmp_f(r_f);
 | 
			
		||||
 | 
			
		||||
      RealD MaxResidSinceLastRelUp = cp; //initial residual    
 | 
			
		||||
    
 | 
			
		||||
      std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
		<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch LinalgTimer;
 | 
			
		||||
      GridStopWatch MatrixTimer;
 | 
			
		||||
      GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Start();
 | 
			
		||||
      int k = 0;
 | 
			
		||||
      int l = 0;
 | 
			
		||||
    
 | 
			
		||||
      for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
	c = cp;
 | 
			
		||||
 | 
			
		||||
	MatrixTimer.Start();
 | 
			
		||||
	Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
 | 
			
		||||
	MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	LinalgTimer.Start();
 | 
			
		||||
 | 
			
		||||
	a = c / d;
 | 
			
		||||
	b_pred = a * (a * qq - d) / c;
 | 
			
		||||
 | 
			
		||||
	cp = axpy_norm(r_f, -a, mmp_f, r_f);
 | 
			
		||||
	b = cp / c;
 | 
			
		||||
 | 
			
		||||
	// Fuse these loops ; should be really easy
 | 
			
		||||
	psi_f = a * p_f + psi_f;
 | 
			
		||||
	//p_f = p_f * b + r_f;
 | 
			
		||||
 | 
			
		||||
	LinalgTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
 | 
			
		||||
		  << " residual " << cp << " target " << rsq << std::endl;
 | 
			
		||||
	std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << "  b = "<< b << std::endl;
 | 
			
		||||
	std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << "  c = "<< c << std::endl;
 | 
			
		||||
 | 
			
		||||
	if(cp > MaxResidSinceLastRelUp){
 | 
			
		||||
	  std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
 | 
			
		||||
	  MaxResidSinceLastRelUp = cp;
 | 
			
		||||
	}
 | 
			
		||||
	  
 | 
			
		||||
	// Stopping condition
 | 
			
		||||
	if (cp <= rsq) {
 | 
			
		||||
	  //Although not written in the paper, I assume that I have to add on the final solution
 | 
			
		||||
	  precisionChange(mmp, psi_f);
 | 
			
		||||
	  psi = psi + mmp;
 | 
			
		||||
	
 | 
			
		||||
	
 | 
			
		||||
	  SolverTimer.Stop();
 | 
			
		||||
	  Linop_d.HermOpAndNorm(psi, mmp, d, qq);
 | 
			
		||||
	  p = mmp - src;
 | 
			
		||||
 | 
			
		||||
	  RealD srcnorm = sqrt(norm2(src));
 | 
			
		||||
	  RealD resnorm = sqrt(norm2(p));
 | 
			
		||||
	  RealD true_residual = resnorm / srcnorm;
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage << "Time breakdown "<<std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
 | 
			
		||||
	  IterationsToComplete = k;	
 | 
			
		||||
	  ReliableUpdatesPerformed = l;
 | 
			
		||||
	  
 | 
			
		||||
	  if(DoFinalCleanup){
 | 
			
		||||
	    //Do a final CG to cleanup
 | 
			
		||||
	    std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
 | 
			
		||||
	    ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
 | 
			
		||||
	    CG.ErrorOnNoConverge = ErrorOnNoConverge;
 | 
			
		||||
	    CG(Linop_d,src,psi);
 | 
			
		||||
	    IterationsToCleanup = CG.IterationsToComplete;
 | 
			
		||||
	  }
 | 
			
		||||
	  else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
	else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
 | 
			
		||||
	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
 | 
			
		||||
		    << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
 | 
			
		||||
	  precisionChange(mmp, psi_f);
 | 
			
		||||
	  psi = psi + mmp;
 | 
			
		||||
 | 
			
		||||
	  Linop_d.HermOpAndNorm(psi, mmp, d, qq);
 | 
			
		||||
	  r = src - mmp;
 | 
			
		||||
 | 
			
		||||
	  psi_f = zero;
 | 
			
		||||
	  precisionChange(r_f, r);
 | 
			
		||||
	  cp = norm2(r);
 | 
			
		||||
	  MaxResidSinceLastRelUp = cp;
 | 
			
		||||
 | 
			
		||||
	  b = cp/c;
 | 
			
		||||
	  
 | 
			
		||||
	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
 | 
			
		||||
	  
 | 
			
		||||
	  l = l+1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
 | 
			
		||||
 | 
			
		||||
	if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
 | 
			
		||||
	  std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
 | 
			
		||||
	  Linop_f_use = Linop_fallback;
 | 
			
		||||
	  using_fallback = true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	
 | 
			
		||||
      }
 | 
			
		||||
      std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
 | 
			
		||||
		<< std::endl;
 | 
			
		||||
      
 | 
			
		||||
      if (ErrorOnNoConverge) assert(0);
 | 
			
		||||
      IterationsToComplete = k;
 | 
			
		||||
      ReliableUpdatesPerformed = l;      
 | 
			
		||||
    }    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,104 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_DEFLATION_H
 | 
			
		||||
#define GRID_DEFLATION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class ZeroGuesser: public LinearFunction<Field> {
 | 
			
		||||
public:
 | 
			
		||||
  virtual void operator()(const Field &src, Field &guess) { guess = zero; };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class SourceGuesser: public LinearFunction<Field> {
 | 
			
		||||
public:
 | 
			
		||||
  virtual void operator()(const Field &src, Field &guess) { guess = src; };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////
 | 
			
		||||
// Fine grid deflation
 | 
			
		||||
////////////////////////////////
 | 
			
		||||
template<class Field>
 | 
			
		||||
class DeflatedGuesser: public LinearFunction<Field> {
 | 
			
		||||
private:
 | 
			
		||||
  const std::vector<Field> &evec;
 | 
			
		||||
  const std::vector<RealD> &eval;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
 | 
			
		||||
 | 
			
		||||
  virtual void operator()(const Field &src,Field &guess) {
 | 
			
		||||
    guess = zero;
 | 
			
		||||
    assert(evec.size()==eval.size());
 | 
			
		||||
    auto N = evec.size();
 | 
			
		||||
    for (int i=0;i<N;i++) {
 | 
			
		||||
      const Field& tmp = evec[i];
 | 
			
		||||
      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
 | 
			
		||||
    }
 | 
			
		||||
    guess.checkerboard = src.checkerboard;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class FineField, class CoarseField>
 | 
			
		||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
 | 
			
		||||
private:
 | 
			
		||||
  const std::vector<FineField>   &subspace;
 | 
			
		||||
  const std::vector<CoarseField> &evec_coarse;
 | 
			
		||||
  const std::vector<RealD>       &eval_coarse;
 | 
			
		||||
public:
 | 
			
		||||
  
 | 
			
		||||
  LocalCoherenceDeflatedGuesser(const std::vector<FineField>   &_subspace,
 | 
			
		||||
				const std::vector<CoarseField> &_evec_coarse,
 | 
			
		||||
				const std::vector<RealD>       &_eval_coarse)
 | 
			
		||||
    : subspace(_subspace), 
 | 
			
		||||
      evec_coarse(_evec_coarse), 
 | 
			
		||||
      eval_coarse(_eval_coarse)  
 | 
			
		||||
  {
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  void operator()(const FineField &src,FineField &guess) { 
 | 
			
		||||
    int N = (int)evec_coarse.size();
 | 
			
		||||
    CoarseField src_coarse(evec_coarse[0]._grid);
 | 
			
		||||
    CoarseField guess_coarse(evec_coarse[0]._grid);    guess_coarse = zero;
 | 
			
		||||
    blockProject(src_coarse,src,subspace);    
 | 
			
		||||
    for (int i=0;i<N;i++) {
 | 
			
		||||
      const CoarseField & tmp = evec_coarse[i];
 | 
			
		||||
      axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
 | 
			
		||||
    }
 | 
			
		||||
    blockPromote(guess_coarse,guess,subspace);
 | 
			
		||||
    guess.checkerboard = src.checkerboard;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,842 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
 | 
			
		||||
Author: Christoph Lehner <clehner@bnl.gov>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_BIRL_H
 | 
			
		||||
#define GRID_BIRL_H
 | 
			
		||||
 | 
			
		||||
#include <string.h> //memset
 | 
			
		||||
//#include <zlib.h>
 | 
			
		||||
#include <sys/stat.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Move following 100 LOC to lattice/Lattice_basis.h
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) 
 | 
			
		||||
{
 | 
			
		||||
  for(int j=0; j<k; ++j){
 | 
			
		||||
    auto ip = innerProduct(basis[j],w);
 | 
			
		||||
    w = w - ip*basis[j];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) 
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  GridBase* grid = basis[0]._grid;
 | 
			
		||||
      
 | 
			
		||||
  parallel_region
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
 | 
			
		||||
       
 | 
			
		||||
    parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
 | 
			
		||||
      for(int j=j0; j<j1; ++j) B[j]=0.;
 | 
			
		||||
      
 | 
			
		||||
      for(int j=j0; j<j1; ++j){
 | 
			
		||||
	for(int k=k0; k<k1; ++k){
 | 
			
		||||
	  B[j] +=Qt(j,k) * basis[k]._odata[ss];
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      for(int j=j0; j<j1; ++j){
 | 
			
		||||
	  basis[j]._odata[ss] = B[j];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Extract a single rotated vector
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) 
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  GridBase* grid = basis[0]._grid;
 | 
			
		||||
 | 
			
		||||
  result.checkerboard = basis[0].checkerboard;
 | 
			
		||||
  parallel_for(int ss=0;ss < grid->oSites();ss++){
 | 
			
		||||
    vobj B = zero;
 | 
			
		||||
    for(int k=k0; k<k1; ++k){
 | 
			
		||||
      B +=Qt(j,k) * basis[k]._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
    result._odata[ss] = B;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx) 
 | 
			
		||||
{
 | 
			
		||||
  int vlen = idx.size();
 | 
			
		||||
 | 
			
		||||
  assert(vlen>=1);
 | 
			
		||||
  assert(vlen<=sort_vals.size());
 | 
			
		||||
  assert(vlen<=_v.size());
 | 
			
		||||
 | 
			
		||||
  for (size_t i=0;i<vlen;i++) {
 | 
			
		||||
 | 
			
		||||
    if (idx[i] != i) {
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////
 | 
			
		||||
      // idx[i] is a table of desired sources giving a permutation.
 | 
			
		||||
      // Swap v[i] with v[idx[i]].
 | 
			
		||||
      // Find  j>i for which _vnew[j] = _vold[i],
 | 
			
		||||
      // track the move idx[j] => idx[i]
 | 
			
		||||
      // track the move idx[i] => i
 | 
			
		||||
      //////////////////////////////////////
 | 
			
		||||
      size_t j;
 | 
			
		||||
      for (j=i;j<idx.size();j++)
 | 
			
		||||
	if (idx[j]==i)
 | 
			
		||||
	  break;
 | 
			
		||||
 | 
			
		||||
      assert(idx[i] > i);     assert(j!=idx.size());      assert(idx[j]==i);
 | 
			
		||||
 | 
			
		||||
      std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
 | 
			
		||||
      std::swap(sort_vals[i],sort_vals[idx[i]]);
 | 
			
		||||
 | 
			
		||||
      idx[j] = idx[i];
 | 
			
		||||
      idx[i] = i;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals) 
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> idx(sort_vals.size());
 | 
			
		||||
  std::iota(idx.begin(), idx.end(), 0);
 | 
			
		||||
 | 
			
		||||
  // sort indexes based on comparing values in v
 | 
			
		||||
  std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
 | 
			
		||||
    return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
 | 
			
		||||
  });
 | 
			
		||||
  return idx;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse) 
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> idx = basisSortGetIndex(sort_vals);
 | 
			
		||||
  if (reverse)
 | 
			
		||||
    std::reverse(idx.begin(), idx.end());
 | 
			
		||||
  
 | 
			
		||||
  basisReorderInPlace(_v,sort_vals,idx);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
// Implicitly restarted lanczos
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
template<class Field> class ImplicitlyRestartedLanczosTester 
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
  virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
 | 
			
		||||
  virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum IRLdiagonalisation { 
 | 
			
		||||
  IRLdiagonaliseWithDSTEGR,
 | 
			
		||||
  IRLdiagonaliseWithQR,
 | 
			
		||||
  IRLdiagonaliseWithEigen
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester  : public ImplicitlyRestartedLanczosTester<Field>
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  LinearFunction<Field>       &_HermOp;
 | 
			
		||||
  ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp)  {  };
 | 
			
		||||
  int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
 | 
			
		||||
  {
 | 
			
		||||
    return TestConvergence(j,resid,B,eval,evalMaxApprox);
 | 
			
		||||
  }
 | 
			
		||||
  int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
 | 
			
		||||
  {
 | 
			
		||||
    Field v(B);
 | 
			
		||||
    RealD eval_poly = eval;
 | 
			
		||||
    // Apply operator
 | 
			
		||||
    _HermOp(B,v);
 | 
			
		||||
 | 
			
		||||
    RealD vnum = real(innerProduct(B,v)); // HermOp.
 | 
			
		||||
    RealD vden = norm2(B);
 | 
			
		||||
    RealD vv0  = norm2(v);
 | 
			
		||||
    eval   = vnum/vden;
 | 
			
		||||
    v -= eval*B;
 | 
			
		||||
 | 
			
		||||
    RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
 | 
			
		||||
 | 
			
		||||
    std::cout.precision(13);
 | 
			
		||||
    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
 | 
			
		||||
	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
 | 
			
		||||
	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
 | 
			
		||||
	     <<std::endl;
 | 
			
		||||
 | 
			
		||||
    int conv=0;
 | 
			
		||||
    if( (vv<eresid*eresid) ) conv = 1;
 | 
			
		||||
 | 
			
		||||
    return conv;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field> 
 | 
			
		||||
class ImplicitlyRestartedLanczos {
 | 
			
		||||
 private:
 | 
			
		||||
  const RealD small = 1.0e-8;
 | 
			
		||||
  int MaxIter;
 | 
			
		||||
  int MinRestart; // Minimum number of restarts; only check for convergence after
 | 
			
		||||
  int Nstop;   // Number of evecs checked for convergence
 | 
			
		||||
  int Nk;      // Number of converged sought
 | 
			
		||||
  //  int Np;      // Np -- Number of spare vecs in krylov space //  == Nm - Nk
 | 
			
		||||
  int Nm;      // Nm -- total number of vectors
 | 
			
		||||
  IRLdiagonalisation diagonalisation;
 | 
			
		||||
  int orth_period;
 | 
			
		||||
    
 | 
			
		||||
  RealD OrthoTime;
 | 
			
		||||
  RealD eresid, betastp;
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Embedded objects
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  LinearFunction<Field>       &_PolyOp;
 | 
			
		||||
  LinearFunction<Field>       &_HermOp;
 | 
			
		||||
  ImplicitlyRestartedLanczosTester<Field> &_Tester;
 | 
			
		||||
  // Default tester provided (we need a ref to something in default case)
 | 
			
		||||
  ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
 | 
			
		||||
  /////////////////////////
 | 
			
		||||
  // Constructor
 | 
			
		||||
  /////////////////////////
 | 
			
		||||
  
 | 
			
		||||
public:       
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  // PAB:
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Too many options  & knobs. 
 | 
			
		||||
  // Eliminate:
 | 
			
		||||
  //   orth_period
 | 
			
		||||
  //   betastp
 | 
			
		||||
  //   MinRestart
 | 
			
		||||
  //
 | 
			
		||||
  // Do we really need orth_period
 | 
			
		||||
  // What is the theoretical basis & guarantees of betastp ?
 | 
			
		||||
  // Nstop=Nk viable?
 | 
			
		||||
  // MinRestart avoidable with new convergence test?
 | 
			
		||||
  // Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
 | 
			
		||||
  // HermOp could be eliminated if we dropped the Power method for max eval.
 | 
			
		||||
  // -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
 ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
 | 
			
		||||
			    LinearFunction<Field> & HermOp,
 | 
			
		||||
			    ImplicitlyRestartedLanczosTester<Field> & Tester,
 | 
			
		||||
			    int _Nstop, // sought vecs
 | 
			
		||||
			    int _Nk, // sought vecs
 | 
			
		||||
			    int _Nm, // spare vecs
 | 
			
		||||
			    RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			    int _MaxIter, // Max iterations
 | 
			
		||||
			    RealD _betastp=0.0, // if beta(k) < betastp: converged
 | 
			
		||||
			    int _MinRestart=1, int _orth_period = 1,
 | 
			
		||||
			    IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
 | 
			
		||||
    SimpleTester(HermOp), _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(Tester),
 | 
			
		||||
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
 | 
			
		||||
    eresid(_eresid),      betastp(_betastp),
 | 
			
		||||
    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
 | 
			
		||||
    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
 | 
			
		||||
			       LinearFunction<Field> & HermOp,
 | 
			
		||||
			       int _Nstop, // sought vecs
 | 
			
		||||
			       int _Nk, // sought vecs
 | 
			
		||||
			       int _Nm, // spare vecs
 | 
			
		||||
			       RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			       int _MaxIter, // Max iterations
 | 
			
		||||
			       RealD _betastp=0.0, // if beta(k) < betastp: converged
 | 
			
		||||
			       int _MinRestart=1, int _orth_period = 1,
 | 
			
		||||
			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
 | 
			
		||||
    SimpleTester(HermOp),  _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(SimpleTester),
 | 
			
		||||
    Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm),
 | 
			
		||||
    eresid(_eresid),      betastp(_betastp),
 | 
			
		||||
    MaxIter(_MaxIter)  ,      MinRestart(_MinRestart),
 | 
			
		||||
    orth_period(_orth_period), diagonalisation(_diagonalisation)  { };
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Helpers
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  template<typename T>  static RealD normalise(T& v) 
 | 
			
		||||
  {
 | 
			
		||||
    RealD nn = norm2(v);
 | 
			
		||||
    nn = sqrt(nn);
 | 
			
		||||
    v = v * (1.0/nn);
 | 
			
		||||
    return nn;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void orthogonalize(Field& w, std::vector<Field>& evec,int k)
 | 
			
		||||
  {
 | 
			
		||||
    OrthoTime-=usecond()/1e6;
 | 
			
		||||
    basisOrthogonalize(evec,w,k);
 | 
			
		||||
    normalise(w);
 | 
			
		||||
    OrthoTime+=usecond()/1e6;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
/* Rudy Arthur's thesis pp.137
 | 
			
		||||
------------------------
 | 
			
		||||
Require: M > K P = M − K †
 | 
			
		||||
Compute the factorization AVM = VM HM + fM eM 
 | 
			
		||||
repeat
 | 
			
		||||
  Q=I
 | 
			
		||||
  for i = 1,...,P do
 | 
			
		||||
    QiRi =HM −θiI Q = QQi
 | 
			
		||||
    H M = Q †i H M Q i
 | 
			
		||||
  end for
 | 
			
		||||
  βK =HM(K+1,K) σK =Q(M,K)
 | 
			
		||||
  r=vK+1βK +rσK
 | 
			
		||||
  VK =VM(1:M)Q(1:M,1:K)
 | 
			
		||||
  HK =HM(1:K,1:K)
 | 
			
		||||
  →AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
 | 
			
		||||
until convergence
 | 
			
		||||
*/
 | 
			
		||||
  void calc(std::vector<RealD>& eval, std::vector<Field>& evec,  const Field& src, int& Nconv, bool reverse=false)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = src._grid;
 | 
			
		||||
    assert(grid == evec[0]._grid);
 | 
			
		||||
    
 | 
			
		||||
    GridLogIRL.TimingMode(1);
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 /  "<< MaxIter<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" -- seek   Nk    = " << Nk    <<" vectors"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" -- total  Nm    = " << Nm    <<" vectors"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
 | 
			
		||||
    std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
 | 
			
		||||
    if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
 | 
			
		||||
      std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
 | 
			
		||||
    } else if ( diagonalisation == IRLdiagonaliseWithQR ) { 
 | 
			
		||||
      std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
 | 
			
		||||
    }  else if ( diagonalisation == IRLdiagonaliseWithEigen ) { 
 | 
			
		||||
      std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
	
 | 
			
		||||
    assert(Nm <= evec.size() && Nm <= eval.size());
 | 
			
		||||
    
 | 
			
		||||
    // quickly get an idea of the largest eigenvalue to more properly normalize the residuum
 | 
			
		||||
    RealD evalMaxApprox = 0.0;
 | 
			
		||||
    {
 | 
			
		||||
      auto src_n = src;
 | 
			
		||||
      auto tmp = src;
 | 
			
		||||
      const int _MAX_ITER_IRL_MEVAPP_ = 50;
 | 
			
		||||
      for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
 | 
			
		||||
	normalise(src_n);
 | 
			
		||||
	_HermOp(src_n,tmp);
 | 
			
		||||
	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
 | 
			
		||||
	RealD vden = norm2(src_n);
 | 
			
		||||
	RealD na = vnum/vden;
 | 
			
		||||
	if (fabs(evalMaxApprox/na - 1.0) < 0.05)
 | 
			
		||||
	  i=_MAX_ITER_IRL_MEVAPP_;
 | 
			
		||||
	evalMaxApprox = na;
 | 
			
		||||
	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
 | 
			
		||||
	src_n = tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
	
 | 
			
		||||
    std::vector<RealD> lme(Nm);  
 | 
			
		||||
    std::vector<RealD> lme2(Nm);
 | 
			
		||||
    std::vector<RealD> eval2(Nm);
 | 
			
		||||
    std::vector<RealD> eval2_copy(Nm);
 | 
			
		||||
    Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
 | 
			
		||||
 | 
			
		||||
    Field f(grid);
 | 
			
		||||
    Field v(grid);
 | 
			
		||||
    int k1 = 1;
 | 
			
		||||
    int k2 = Nk;
 | 
			
		||||
    RealD beta_k;
 | 
			
		||||
 | 
			
		||||
    Nconv = 0;
 | 
			
		||||
  
 | 
			
		||||
    // Set initial vector
 | 
			
		||||
    evec[0] = src;
 | 
			
		||||
    normalise(evec[0]);
 | 
			
		||||
	
 | 
			
		||||
    // Initial Nk steps
 | 
			
		||||
    OrthoTime=0.;
 | 
			
		||||
    for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
 | 
			
		||||
    std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
 | 
			
		||||
    std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // Restarting loop begins
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    int iter;
 | 
			
		||||
    for(iter = 0; iter<MaxIter; ++iter){
 | 
			
		||||
      
 | 
			
		||||
      OrthoTime=0.;
 | 
			
		||||
 | 
			
		||||
      std::cout<< GridLogMessage <<" **********************"<< std::endl;
 | 
			
		||||
      std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
 | 
			
		||||
      std::cout<< GridLogMessage <<" **********************"<< std::endl;
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
 | 
			
		||||
      for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
 | 
			
		||||
      f *= lme[Nm-1];
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
 | 
			
		||||
      std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
 | 
			
		||||
	  
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      // getting eigenvalues
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      for(int k=0; k<Nm; ++k){
 | 
			
		||||
	eval2[k] = eval[k+k1-1];
 | 
			
		||||
	lme2[k] = lme[k+k1-1];
 | 
			
		||||
      }
 | 
			
		||||
      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
 | 
			
		||||
      diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
 | 
			
		||||
      std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      // sorting
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      eval2_copy = eval2;
 | 
			
		||||
      std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
 | 
			
		||||
      std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
 | 
			
		||||
      const int chunk=8;
 | 
			
		||||
      for(int io=0; io<k2;io+=chunk){
 | 
			
		||||
	std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
 | 
			
		||||
	for(int ii=0;ii<chunk;ii++){
 | 
			
		||||
	  if ( (io+ii)<k2 )
 | 
			
		||||
	    std::cout<< " "<< std::setw(12)<< eval2[io+ii];
 | 
			
		||||
	}
 | 
			
		||||
	std::cout << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      // Implicitly shifted QR transformations
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
 | 
			
		||||
      for(int ip=k2; ip<Nm; ++ip){ 
 | 
			
		||||
	QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
 | 
			
		||||
      }
 | 
			
		||||
      std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
 | 
			
		||||
 | 
			
		||||
      assert(k2<Nm);      assert(k2<Nm);      assert(k1>0);
 | 
			
		||||
 | 
			
		||||
      basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
 | 
			
		||||
      std::cout<<GridLogIRL <<"basisRotated  by Qt"<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      ////////////////////////////////////////////////////
 | 
			
		||||
      // Compressed vector f and beta(k2)
 | 
			
		||||
      ////////////////////////////////////////////////////
 | 
			
		||||
      f *= Qt(k2-1,Nm-1);
 | 
			
		||||
      f += lme[k2-1] * evec[k2];
 | 
			
		||||
      beta_k = norm2(f);
 | 
			
		||||
      beta_k = sqrt(beta_k);
 | 
			
		||||
      std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
 | 
			
		||||
	  
 | 
			
		||||
      RealD betar = 1.0/beta_k;
 | 
			
		||||
      evec[k2] = betar * f;
 | 
			
		||||
      lme[k2-1] = beta_k;
 | 
			
		||||
	  
 | 
			
		||||
      ////////////////////////////////////////////////////
 | 
			
		||||
      // Convergence test
 | 
			
		||||
      ////////////////////////////////////////////////////
 | 
			
		||||
      for(int k=0; k<Nm; ++k){    
 | 
			
		||||
	eval2[k] = eval[k];
 | 
			
		||||
	lme2[k] = lme[k];
 | 
			
		||||
      }
 | 
			
		||||
      Qt = Eigen::MatrixXd::Identity(Nm,Nm);
 | 
			
		||||
      diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
 | 
			
		||||
      std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
 | 
			
		||||
	  
 | 
			
		||||
      Nconv = 0;
 | 
			
		||||
      if (iter >= MinRestart) {
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
 | 
			
		||||
 | 
			
		||||
	Field B(grid); B.checkerboard = evec[0].checkerboard;
 | 
			
		||||
 | 
			
		||||
	//  power of two search pattern;  not every evalue in eval2 is assessed.
 | 
			
		||||
	int allconv =1;
 | 
			
		||||
	for(int jj = 1; jj<=Nstop; jj*=2){
 | 
			
		||||
	  int j = Nstop-jj;
 | 
			
		||||
	  RealD e = eval2_copy[j]; // Discard the evalue
 | 
			
		||||
	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
 | 
			
		||||
	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
 | 
			
		||||
	    allconv=0;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	// Do evec[0] for good measure
 | 
			
		||||
	{ 
 | 
			
		||||
	  int j=0;
 | 
			
		||||
	  RealD e = eval2_copy[0]; 
 | 
			
		||||
	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
 | 
			
		||||
	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
 | 
			
		||||
	}
 | 
			
		||||
	if ( allconv ) Nconv = Nstop;
 | 
			
		||||
 | 
			
		||||
	// test if we converged, if so, terminate
 | 
			
		||||
	std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
 | 
			
		||||
	//	if( Nconv>=Nstop || beta_k < betastp){
 | 
			
		||||
	if( Nconv>=Nstop){
 | 
			
		||||
	  goto converged;
 | 
			
		||||
	}
 | 
			
		||||
	  
 | 
			
		||||
      } else {
 | 
			
		||||
	std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
 | 
			
		||||
      } // end of iter loop
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogError<<"\n NOT converged.\n";
 | 
			
		||||
    abort();
 | 
			
		||||
	
 | 
			
		||||
  converged:
 | 
			
		||||
    {
 | 
			
		||||
      Field B(grid); B.checkerboard = evec[0].checkerboard;
 | 
			
		||||
      basisRotate(evec,Qt,0,Nk,0,Nk,Nm);	    
 | 
			
		||||
      std::cout << GridLogIRL << " Rotated basis"<<std::endl;
 | 
			
		||||
      Nconv=0;
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Full final convergence test; unconditionally applied
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////
 | 
			
		||||
      for(int j = 0; j<=Nk; j++){
 | 
			
		||||
	B=evec[j];
 | 
			
		||||
	if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
 | 
			
		||||
	  Nconv++;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if ( Nconv < Nstop )
 | 
			
		||||
	std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
 | 
			
		||||
 | 
			
		||||
      eval=eval2;
 | 
			
		||||
      
 | 
			
		||||
      //Keep only converged
 | 
			
		||||
      eval.resize(Nconv);// Nstop?
 | 
			
		||||
      evec.resize(Nconv,grid);// Nstop?
 | 
			
		||||
      basisSortInPlace(evec,eval,reverse);
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
       
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
    std::cout << GridLogIRL << " -- Iterations  = "<< iter   << "\n";
 | 
			
		||||
    std::cout << GridLogIRL << " -- beta(k)     = "<< beta_k << "\n";
 | 
			
		||||
    std::cout << GridLogIRL << " -- Nconv       = "<< Nconv  << "\n";
 | 
			
		||||
    std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
/* Saad PP. 195
 | 
			
		||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
 | 
			
		||||
2. For k = 1,2,...,m Do:
 | 
			
		||||
3. wk:=Avk−βkv_{k−1}      
 | 
			
		||||
4. αk:=(wk,vk)       // 
 | 
			
		||||
5. wk:=wk−αkvk       // wk orthog vk 
 | 
			
		||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
7. vk+1 := wk/βk+1
 | 
			
		||||
8. EndDo
 | 
			
		||||
 */
 | 
			
		||||
  void step(std::vector<RealD>& lmd,
 | 
			
		||||
	    std::vector<RealD>& lme, 
 | 
			
		||||
	    std::vector<Field>& evec,
 | 
			
		||||
	    Field& w,int Nm,int k)
 | 
			
		||||
  {
 | 
			
		||||
    const RealD tiny = 1.0e-20;
 | 
			
		||||
    assert( k< Nm );
 | 
			
		||||
 | 
			
		||||
    GridStopWatch gsw_op,gsw_o;
 | 
			
		||||
 | 
			
		||||
    Field& evec_k = evec[k];
 | 
			
		||||
 | 
			
		||||
    _PolyOp(evec_k,w);    std::cout<<GridLogIRL << "PolyOp" <<std::endl;
 | 
			
		||||
 | 
			
		||||
    if(k>0) w -= lme[k-1] * evec[k-1];
 | 
			
		||||
 | 
			
		||||
    ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
 | 
			
		||||
    RealD     alph = real(zalph);
 | 
			
		||||
 | 
			
		||||
    w = w - alph * evec_k;// 5. wk:=wk−αkvk
 | 
			
		||||
 | 
			
		||||
    RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
    // 7. vk+1 := wk/βk+1
 | 
			
		||||
 | 
			
		||||
    lmd[k] = alph;
 | 
			
		||||
    lme[k] = beta;
 | 
			
		||||
 | 
			
		||||
    if (k>0 && k % orth_period == 0) {
 | 
			
		||||
      orthogonalize(w,evec,k); // orthonormalise
 | 
			
		||||
      std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(k < Nm-1) evec[k+1] = w;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
 | 
			
		||||
    if ( beta < tiny ) 
 | 
			
		||||
      std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
 | 
			
		||||
			 int Nk, int Nm,  
 | 
			
		||||
			 Eigen::MatrixXd & Qt, // Nm x Nm
 | 
			
		||||
			 GridBase *grid)
 | 
			
		||||
  {
 | 
			
		||||
    Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<Nk;i++)   TriDiag(i,i)   = lmd[i];
 | 
			
		||||
    for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
 | 
			
		||||
    for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
 | 
			
		||||
    
 | 
			
		||||
    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < Nk; i++) {
 | 
			
		||||
      lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
 | 
			
		||||
    }
 | 
			
		||||
    for (int i = 0; i < Nk; i++) {
 | 
			
		||||
      for (int j = 0; j < Nk; j++) {
 | 
			
		||||
	Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // File could end here if settle on Eigen ??? !!!
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  void QR_decomp(std::vector<RealD>& lmd,   // Nm 
 | 
			
		||||
		 std::vector<RealD>& lme,   // Nm 
 | 
			
		||||
		 int Nk, int Nm,            // Nk, Nm
 | 
			
		||||
		 Eigen::MatrixXd& Qt,       // Nm x Nm matrix
 | 
			
		||||
		 RealD Dsh, int kmin, int kmax)
 | 
			
		||||
  {
 | 
			
		||||
    int k = kmin-1;
 | 
			
		||||
    RealD x;
 | 
			
		||||
    
 | 
			
		||||
    RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
 | 
			
		||||
    RealD c = ( lmd[k] -Dsh) *Fden;
 | 
			
		||||
    RealD s = -lme[k] *Fden;
 | 
			
		||||
      
 | 
			
		||||
    RealD tmpa1 = lmd[k];
 | 
			
		||||
    RealD tmpa2 = lmd[k+1];
 | 
			
		||||
    RealD tmpb  = lme[k];
 | 
			
		||||
 | 
			
		||||
    lmd[k]   = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
 | 
			
		||||
    lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
 | 
			
		||||
    lme[k]   = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
 | 
			
		||||
    x        =-s*lme[k+1];
 | 
			
		||||
    lme[k+1] = c*lme[k+1];
 | 
			
		||||
      
 | 
			
		||||
    for(int i=0; i<Nk; ++i){
 | 
			
		||||
      RealD Qtmp1 = Qt(k,i);
 | 
			
		||||
      RealD Qtmp2 = Qt(k+1,i);
 | 
			
		||||
      Qt(k,i)  = c*Qtmp1 - s*Qtmp2;
 | 
			
		||||
      Qt(k+1,i)= s*Qtmp1 + c*Qtmp2; 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Givens transformations
 | 
			
		||||
    for(int k = kmin; k < kmax-1; ++k){
 | 
			
		||||
      
 | 
			
		||||
      RealD Fden = 1.0/hypot(x,lme[k-1]);
 | 
			
		||||
      RealD c = lme[k-1]*Fden;
 | 
			
		||||
      RealD s = - x*Fden;
 | 
			
		||||
	
 | 
			
		||||
      RealD tmpa1 = lmd[k];
 | 
			
		||||
      RealD tmpa2 = lmd[k+1];
 | 
			
		||||
      RealD tmpb  = lme[k];
 | 
			
		||||
 | 
			
		||||
      lmd[k]   = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
 | 
			
		||||
      lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
 | 
			
		||||
      lme[k]   = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
 | 
			
		||||
      lme[k-1] = c*lme[k-1] -s*x;
 | 
			
		||||
 | 
			
		||||
      if(k != kmax-2){
 | 
			
		||||
	x = -s*lme[k+1];
 | 
			
		||||
	lme[k+1] = c*lme[k+1];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for(int i=0; i<Nk; ++i){
 | 
			
		||||
	RealD Qtmp1 = Qt(k,i);
 | 
			
		||||
	RealD Qtmp2 = Qt(k+1,i);
 | 
			
		||||
	Qt(k,i)     = c*Qtmp1 -s*Qtmp2;
 | 
			
		||||
	Qt(k+1,i)   = s*Qtmp1 +c*Qtmp2;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
 | 
			
		||||
		   int Nk, int Nm,   
 | 
			
		||||
		   Eigen::MatrixXd & Qt,
 | 
			
		||||
		   GridBase *grid)
 | 
			
		||||
  {
 | 
			
		||||
    Qt = Eigen::MatrixXd::Identity(Nm,Nm);
 | 
			
		||||
    if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
 | 
			
		||||
      diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
 | 
			
		||||
    } else if ( diagonalisation == IRLdiagonaliseWithQR ) { 
 | 
			
		||||
      diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
 | 
			
		||||
    }  else if ( diagonalisation == IRLdiagonaliseWithEigen ) { 
 | 
			
		||||
      diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
 | 
			
		||||
    } else { 
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
 | 
			
		||||
                   double *vl, double *vu, int *il, int *iu, double *abstol,
 | 
			
		||||
                   int *m, double *w, double *z, int *ldz, int *isuppz,
 | 
			
		||||
                   double *work, int *lwork, int *iwork, int *liwork,
 | 
			
		||||
                   int *info);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void diagonalize_lapack(std::vector<RealD>& lmd,
 | 
			
		||||
			std::vector<RealD>& lme, 
 | 
			
		||||
			int Nk, int Nm,  
 | 
			
		||||
			Eigen::MatrixXd& Qt,
 | 
			
		||||
			GridBase *grid)
 | 
			
		||||
{
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
  const int size = Nm;
 | 
			
		||||
  int NN = Nk;
 | 
			
		||||
  double evals_tmp[NN];
 | 
			
		||||
  double evec_tmp[NN][NN];
 | 
			
		||||
  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
 | 
			
		||||
  double DD[NN];
 | 
			
		||||
  double EE[NN];
 | 
			
		||||
  for (int i = 0; i< NN; i++) {
 | 
			
		||||
    for (int j = i - 1; j <= i + 1; j++) {
 | 
			
		||||
      if ( j < NN && j >= 0 ) {
 | 
			
		||||
	if (i==j) DD[i] = lmd[i];
 | 
			
		||||
	if (i==j) evals_tmp[i] = lmd[i];
 | 
			
		||||
	if (j==(i-1)) EE[j] = lme[j];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  int evals_found;
 | 
			
		||||
  int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
 | 
			
		||||
  int liwork =  3+NN*10 ;
 | 
			
		||||
  int iwork[liwork];
 | 
			
		||||
  double work[lwork];
 | 
			
		||||
  int isuppz[2*NN];
 | 
			
		||||
  char jobz = 'V'; // calculate evals & evecs
 | 
			
		||||
  char range = 'I'; // calculate all evals
 | 
			
		||||
  //    char range = 'A'; // calculate all evals
 | 
			
		||||
  char uplo = 'U'; // refer to upper half of original matrix
 | 
			
		||||
  char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
 | 
			
		||||
  int ifail[NN];
 | 
			
		||||
  int info;
 | 
			
		||||
  int total = grid->_Nprocessors;
 | 
			
		||||
  int node  = grid->_processor;
 | 
			
		||||
  int interval = (NN/total)+1;
 | 
			
		||||
  double vl = 0.0, vu = 0.0;
 | 
			
		||||
  int il = interval*node+1 , iu = interval*(node+1);
 | 
			
		||||
  if (iu > NN)  iu=NN;
 | 
			
		||||
  double tol = 0.0;
 | 
			
		||||
  if (1) {
 | 
			
		||||
    memset(evals_tmp,0,sizeof(double)*NN);
 | 
			
		||||
    if ( il <= NN){
 | 
			
		||||
      LAPACK_dstegr(&jobz, &range, &NN,
 | 
			
		||||
		    (double*)DD, (double*)EE,
 | 
			
		||||
		    &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
 | 
			
		||||
		    &tol, // tolerance
 | 
			
		||||
		    &evals_found, evals_tmp, (double*)evec_tmp, &NN,
 | 
			
		||||
		    isuppz,
 | 
			
		||||
		    work, &lwork, iwork, &liwork,
 | 
			
		||||
		    &info);
 | 
			
		||||
      for (int i = iu-1; i>= il-1; i--){
 | 
			
		||||
	evals_tmp[i] = evals_tmp[i - (il-1)];
 | 
			
		||||
	if (il>1) evals_tmp[i-(il-1)]=0.;
 | 
			
		||||
	for (int j = 0; j< NN; j++){
 | 
			
		||||
	  evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
 | 
			
		||||
	  if (il>1) evec_tmp[i-(il-1)][j]=0.;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    {
 | 
			
		||||
      grid->GlobalSumVector(evals_tmp,NN);
 | 
			
		||||
      grid->GlobalSumVector((double*)evec_tmp,NN*NN);
 | 
			
		||||
    }
 | 
			
		||||
  } 
 | 
			
		||||
  // Safer to sort instead of just reversing it, 
 | 
			
		||||
  // but the document of the routine says evals are sorted in increasing order. 
 | 
			
		||||
  // qr gives evals in decreasing order.
 | 
			
		||||
  for(int i=0;i<NN;i++){
 | 
			
		||||
    lmd [NN-1-i]=evals_tmp[i];
 | 
			
		||||
    for(int j=0;j<NN;j++){
 | 
			
		||||
      Qt((NN-1-i),j)=evec_tmp[i][j];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
#else 
 | 
			
		||||
  assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme, 
 | 
			
		||||
		    int Nk, int Nm,   
 | 
			
		||||
		    Eigen::MatrixXd & Qt,
 | 
			
		||||
		    GridBase *grid)
 | 
			
		||||
{
 | 
			
		||||
  int QRiter = 100*Nm;
 | 
			
		||||
  int kmin = 1;
 | 
			
		||||
  int kmax = Nk;
 | 
			
		||||
  
 | 
			
		||||
  // (this should be more sophisticated)
 | 
			
		||||
  for(int iter=0; iter<QRiter; ++iter){
 | 
			
		||||
    
 | 
			
		||||
    // determination of 2x2 leading submatrix
 | 
			
		||||
    RealD dsub = lmd[kmax-1]-lmd[kmax-2];
 | 
			
		||||
    RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
 | 
			
		||||
    RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
 | 
			
		||||
    // (Dsh: shift)
 | 
			
		||||
    
 | 
			
		||||
    // transformation
 | 
			
		||||
    QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
 | 
			
		||||
    
 | 
			
		||||
    // Convergence criterion (redef of kmin and kamx)
 | 
			
		||||
    for(int j=kmax-1; j>= kmin; --j){
 | 
			
		||||
      RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
 | 
			
		||||
      if(fabs(lme[j-1])+dds > dds){
 | 
			
		||||
	kmax = j+1;
 | 
			
		||||
	goto continued;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    QRiter = iter;
 | 
			
		||||
    return;
 | 
			
		||||
    
 | 
			
		||||
  continued:
 | 
			
		||||
    for(int j=0; j<kmax-1; ++j){
 | 
			
		||||
      RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
 | 
			
		||||
      if(fabs(lme[j])+dds > dds){
 | 
			
		||||
	kmin = j+1;
 | 
			
		||||
	break;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
 | 
			
		||||
  abort();
 | 
			
		||||
}
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,406 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Christoph Lehner <clehner@bnl.gov>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
 | 
			
		||||
#define GRID_LOCAL_COHERENCE_IRL_H
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
struct LanczosParams : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
 | 
			
		||||
				  ChebyParams, Cheby,/*Chebyshev*/
 | 
			
		||||
				  int, Nstop,    /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
 | 
			
		||||
				  int, Nk,       /*Vecs in Lanczos seek converge*/
 | 
			
		||||
				  int, Nm,       /*Total vecs in Lanczos include restart*/
 | 
			
		||||
				  RealD, resid,  /*residual*/
 | 
			
		||||
 				  int, MaxIt, 
 | 
			
		||||
				  RealD, betastp,  /* ? */
 | 
			
		||||
				  int, MinRes);    // Must restart
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct LocalCoherenceLanczosParams : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
 | 
			
		||||
				  bool, saveEvecs,
 | 
			
		||||
				  bool, doFine,
 | 
			
		||||
				  bool, doFineRead,
 | 
			
		||||
				  bool, doCoarse,
 | 
			
		||||
	       			  bool, doCoarseRead,
 | 
			
		||||
				  LanczosParams, FineParams,
 | 
			
		||||
				  LanczosParams, CoarseParams,
 | 
			
		||||
				  ChebyParams,   Smoother,
 | 
			
		||||
				  RealD        , coarse_relax_tol,
 | 
			
		||||
				  std::vector<int>, blockSize,
 | 
			
		||||
				  std::string, config,
 | 
			
		||||
				  std::vector < std::complex<double>  >, omega,
 | 
			
		||||
				  RealD, mass,
 | 
			
		||||
				  RealD, M5);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
 | 
			
		||||
public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj>          FineField;
 | 
			
		||||
 | 
			
		||||
  LinearOperatorBase<FineField> &_Linop;
 | 
			
		||||
  std::vector<FineField>        &subspace;
 | 
			
		||||
 | 
			
		||||
  ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) : 
 | 
			
		||||
    _Linop(linop), subspace(_subspace)
 | 
			
		||||
  {  
 | 
			
		||||
    assert(subspace.size() >0);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void operator()(const CoarseField& in, CoarseField& out) {
 | 
			
		||||
    GridBase *FineGrid = subspace[0]._grid;    
 | 
			
		||||
    int   checkerboard = subspace[0].checkerboard;
 | 
			
		||||
      
 | 
			
		||||
    FineField fin (FineGrid);     fin.checkerboard= checkerboard;
 | 
			
		||||
    FineField fout(FineGrid);   fout.checkerboard = checkerboard;
 | 
			
		||||
 | 
			
		||||
    blockPromote(in,fin,subspace);       std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
 | 
			
		||||
    _Linop.HermOp(fin,fout);             std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
 | 
			
		||||
    blockProject(out,fout,subspace);     std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
 | 
			
		||||
public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj>          FineField;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  OperatorFunction<FineField>   & _poly;
 | 
			
		||||
  LinearOperatorBase<FineField> &_Linop;
 | 
			
		||||
  std::vector<FineField>        &subspace;
 | 
			
		||||
 | 
			
		||||
  ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
 | 
			
		||||
			  LinearOperatorBase<FineField>& linop, 
 | 
			
		||||
			  std::vector<FineField> & _subspace) :
 | 
			
		||||
    _poly(poly),
 | 
			
		||||
    _Linop(linop),
 | 
			
		||||
    subspace(_subspace)
 | 
			
		||||
  {  };
 | 
			
		||||
 | 
			
		||||
  void operator()(const CoarseField& in, CoarseField& out) {
 | 
			
		||||
    
 | 
			
		||||
    GridBase *FineGrid = subspace[0]._grid;    
 | 
			
		||||
    int   checkerboard = subspace[0].checkerboard;
 | 
			
		||||
 | 
			
		||||
    FineField fin (FineGrid); fin.checkerboard =checkerboard;
 | 
			
		||||
    FineField fout(FineGrid);fout.checkerboard =checkerboard;
 | 
			
		||||
    
 | 
			
		||||
    blockPromote(in,fin,subspace);             std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
 | 
			
		||||
    _poly(_Linop,fin,fout);                    std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
 | 
			
		||||
    blockProject(out,fout,subspace);           std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class ImplicitlyRestartedLanczosSmoothedTester  : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj>          FineField;
 | 
			
		||||
 | 
			
		||||
  LinearFunction<CoarseField> & _Poly;
 | 
			
		||||
  OperatorFunction<FineField>   & _smoother;
 | 
			
		||||
  LinearOperatorBase<FineField> &_Linop;
 | 
			
		||||
  RealD                          _coarse_relax_tol;
 | 
			
		||||
  std::vector<FineField>        &_subspace;
 | 
			
		||||
  
 | 
			
		||||
  ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly,
 | 
			
		||||
					   OperatorFunction<FineField>   &smoother,
 | 
			
		||||
					   LinearOperatorBase<FineField> &Linop,
 | 
			
		||||
					   std::vector<FineField>        &subspace,
 | 
			
		||||
					   RealD coarse_relax_tol=5.0e3) 
 | 
			
		||||
    : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
 | 
			
		||||
      _coarse_relax_tol(coarse_relax_tol)  
 | 
			
		||||
  {    };
 | 
			
		||||
 | 
			
		||||
  int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
 | 
			
		||||
  {
 | 
			
		||||
    CoarseField v(B);
 | 
			
		||||
    RealD eval_poly = eval;
 | 
			
		||||
 | 
			
		||||
    // Apply operator
 | 
			
		||||
    _Poly(B,v);
 | 
			
		||||
 | 
			
		||||
    RealD vnum = real(innerProduct(B,v)); // HermOp.
 | 
			
		||||
    RealD vden = norm2(B);
 | 
			
		||||
    RealD vv0  = norm2(v);
 | 
			
		||||
    eval   = vnum/vden;
 | 
			
		||||
    v -= eval*B;
 | 
			
		||||
 | 
			
		||||
    RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
 | 
			
		||||
 | 
			
		||||
    std::cout.precision(13);
 | 
			
		||||
    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
 | 
			
		||||
	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
 | 
			
		||||
	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
 | 
			
		||||
	     <<std::endl;
 | 
			
		||||
 | 
			
		||||
    int conv=0;
 | 
			
		||||
    if( (vv<eresid*eresid) ) conv = 1;
 | 
			
		||||
    return conv;
 | 
			
		||||
  }
 | 
			
		||||
  int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *FineGrid = _subspace[0]._grid;    
 | 
			
		||||
    int checkerboard   = _subspace[0].checkerboard;
 | 
			
		||||
    FineField fB(FineGrid);fB.checkerboard =checkerboard;
 | 
			
		||||
    FineField fv(FineGrid);fv.checkerboard =checkerboard;
 | 
			
		||||
 | 
			
		||||
    blockPromote(B,fv,_subspace);  
 | 
			
		||||
    
 | 
			
		||||
    _smoother(_Linop,fv,fB); 
 | 
			
		||||
 | 
			
		||||
    RealD eval_poly = eval;
 | 
			
		||||
    _Linop.HermOp(fB,fv);
 | 
			
		||||
 | 
			
		||||
    RealD vnum = real(innerProduct(fB,fv)); // HermOp.
 | 
			
		||||
    RealD vden = norm2(fB);
 | 
			
		||||
    RealD vv0  = norm2(fv);
 | 
			
		||||
    eval   = vnum/vden;
 | 
			
		||||
    fv -= eval*fB;
 | 
			
		||||
    RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
 | 
			
		||||
 | 
			
		||||
    std::cout.precision(13);
 | 
			
		||||
    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
 | 
			
		||||
	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
 | 
			
		||||
	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
 | 
			
		||||
	     <<std::endl;
 | 
			
		||||
    if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
 | 
			
		||||
    if( (vv<eresid*eresid) ) return 1;
 | 
			
		||||
    return 0;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Make serializable Lanczos params
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class LocalCoherenceLanczos 
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<Fobj>                       FineField;
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
  GridBase *_CoarseGrid;
 | 
			
		||||
  GridBase *_FineGrid;
 | 
			
		||||
  int _checkerboard;
 | 
			
		||||
  LinearOperatorBase<FineField>                 & _FineOp;
 | 
			
		||||
  
 | 
			
		||||
  std::vector<RealD>                              &evals_fine;
 | 
			
		||||
  std::vector<RealD>                              &evals_coarse; 
 | 
			
		||||
  std::vector<FineField>                          &subspace;
 | 
			
		||||
  std::vector<CoarseField>                        &evec_coarse;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  std::vector<RealD>                              _evals_fine;
 | 
			
		||||
  std::vector<RealD>                              _evals_coarse; 
 | 
			
		||||
  std::vector<FineField>                          _subspace;
 | 
			
		||||
  std::vector<CoarseField>                        _evec_coarse;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  LocalCoherenceLanczos(GridBase *FineGrid,
 | 
			
		||||
			GridBase *CoarseGrid,
 | 
			
		||||
			LinearOperatorBase<FineField> &FineOp,
 | 
			
		||||
			int checkerboard) :
 | 
			
		||||
    _CoarseGrid(CoarseGrid),
 | 
			
		||||
    _FineGrid(FineGrid),
 | 
			
		||||
    _FineOp(FineOp),
 | 
			
		||||
    _checkerboard(checkerboard),
 | 
			
		||||
    evals_fine  (_evals_fine),
 | 
			
		||||
    evals_coarse(_evals_coarse),
 | 
			
		||||
    subspace    (_subspace),
 | 
			
		||||
    evec_coarse(_evec_coarse)
 | 
			
		||||
  {
 | 
			
		||||
    evals_fine.resize(0);
 | 
			
		||||
    evals_coarse.resize(0);
 | 
			
		||||
  };
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Alternate constructore, external storage for use by Hadrons module
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  LocalCoherenceLanczos(GridBase *FineGrid,
 | 
			
		||||
			GridBase *CoarseGrid,
 | 
			
		||||
			LinearOperatorBase<FineField> &FineOp,
 | 
			
		||||
			int checkerboard,
 | 
			
		||||
			std::vector<FineField>   &ext_subspace,
 | 
			
		||||
			std::vector<CoarseField> &ext_coarse,
 | 
			
		||||
			std::vector<RealD>       &ext_eval_fine,
 | 
			
		||||
			std::vector<RealD>       &ext_eval_coarse
 | 
			
		||||
			) :
 | 
			
		||||
    _CoarseGrid(CoarseGrid),
 | 
			
		||||
    _FineGrid(FineGrid),
 | 
			
		||||
    _FineOp(FineOp),
 | 
			
		||||
    _checkerboard(checkerboard),
 | 
			
		||||
    evals_fine  (ext_eval_fine), 
 | 
			
		||||
    evals_coarse(ext_eval_coarse),
 | 
			
		||||
    subspace    (ext_subspace),
 | 
			
		||||
    evec_coarse (ext_coarse)
 | 
			
		||||
  {
 | 
			
		||||
    evals_fine.resize(0);
 | 
			
		||||
    evals_coarse.resize(0);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Orthogonalise(void ) {
 | 
			
		||||
    CoarseScalar InnerProd(_CoarseGrid);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
 | 
			
		||||
    blockOrthogonalise(InnerProd,subspace);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<typename T>  static RealD normalise(T& v) 
 | 
			
		||||
  {
 | 
			
		||||
    RealD nn = norm2(v);
 | 
			
		||||
    nn = ::sqrt(nn);
 | 
			
		||||
    v = v * (1.0/nn);
 | 
			
		||||
    return nn;
 | 
			
		||||
  }
 | 
			
		||||
  /*
 | 
			
		||||
  void fakeFine(void)
 | 
			
		||||
  {
 | 
			
		||||
    int Nk = nbasis;
 | 
			
		||||
    subspace.resize(Nk,_FineGrid);
 | 
			
		||||
    subspace[0]=1.0;
 | 
			
		||||
    subspace[0].checkerboard=_checkerboard;
 | 
			
		||||
    normalise(subspace[0]);
 | 
			
		||||
    PlainHermOp<FineField>    Op(_FineOp);
 | 
			
		||||
    for(int k=1;k<Nk;k++){
 | 
			
		||||
      subspace[k].checkerboard=_checkerboard;
 | 
			
		||||
      Op(subspace[k-1],subspace[k]);
 | 
			
		||||
      normalise(subspace[k]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  */
 | 
			
		||||
 | 
			
		||||
  void testFine(RealD resid) 
 | 
			
		||||
  {
 | 
			
		||||
    assert(evals_fine.size() == nbasis);
 | 
			
		||||
    assert(subspace.size() == nbasis);
 | 
			
		||||
    PlainHermOp<FineField>    Op(_FineOp);
 | 
			
		||||
    ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
 | 
			
		||||
    for(int k=0;k<nbasis;k++){
 | 
			
		||||
      assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) 
 | 
			
		||||
  {
 | 
			
		||||
    assert(evals_fine.size() == nbasis);
 | 
			
		||||
    assert(subspace.size() == nbasis);
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    Chebyshev<FineField>                          ChebySmooth(cheby_smooth);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
 | 
			
		||||
    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
 | 
			
		||||
 | 
			
		||||
    for(int k=0;k<evec_coarse.size();k++){
 | 
			
		||||
      if ( k < nbasis ) { 
 | 
			
		||||
	assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
 | 
			
		||||
      } else { 
 | 
			
		||||
	assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid, 
 | 
			
		||||
		RealD MaxIt, RealD betastp, int MinRes)
 | 
			
		||||
  {
 | 
			
		||||
    assert(nbasis<=Nm);
 | 
			
		||||
    Chebyshev<FineField>      Cheby(cheby_parms);
 | 
			
		||||
    FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
 | 
			
		||||
    PlainHermOp<FineField>    Op(_FineOp);
 | 
			
		||||
 | 
			
		||||
    evals_fine.resize(Nm);
 | 
			
		||||
    subspace.resize(Nm,_FineGrid);
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
 | 
			
		||||
 | 
			
		||||
    FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
 | 
			
		||||
 | 
			
		||||
    int Nconv;
 | 
			
		||||
    IRL.calc(evals_fine,subspace,src,Nconv,false);
 | 
			
		||||
    
 | 
			
		||||
    // Shrink down to number saved
 | 
			
		||||
    assert(Nstop>=nbasis);
 | 
			
		||||
    assert(Nconv>=nbasis);
 | 
			
		||||
    evals_fine.resize(nbasis);
 | 
			
		||||
    subspace.resize(nbasis,_FineGrid);
 | 
			
		||||
  }
 | 
			
		||||
  void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
 | 
			
		||||
		  int Nstop, int Nk, int Nm,RealD resid, 
 | 
			
		||||
		  RealD MaxIt, RealD betastp, int MinRes)
 | 
			
		||||
  {
 | 
			
		||||
    Chebyshev<FineField>                          Cheby(cheby_op);
 | 
			
		||||
    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,subspace);
 | 
			
		||||
    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    Chebyshev<FineField>                                           ChebySmooth(cheby_smooth);
 | 
			
		||||
    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
 | 
			
		||||
 | 
			
		||||
    evals_coarse.resize(Nm);
 | 
			
		||||
    evec_coarse.resize(Nm,_CoarseGrid);
 | 
			
		||||
 | 
			
		||||
    CoarseField src(_CoarseGrid);     src=1.0; 
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
 | 
			
		||||
    int Nconv=0;
 | 
			
		||||
    IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
 | 
			
		||||
    assert(Nconv>=Nstop);
 | 
			
		||||
    evals_coarse.resize(Nstop);
 | 
			
		||||
    evec_coarse.resize (Nstop,_CoarseGrid);
 | 
			
		||||
    for (int i=0;i<Nstop;i++){
 | 
			
		||||
      std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,186 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class PauliVillarsSolverUnprec
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
  ConjugateGradient<Field> & CG;
 | 
			
		||||
  PauliVillarsSolverUnprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
 | 
			
		||||
 | 
			
		||||
  template<class Matrix>
 | 
			
		||||
  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
 | 
			
		||||
  {
 | 
			
		||||
    RealD m = _Matrix.Mass();
 | 
			
		||||
    Field A  (_Matrix.FermionGrid());
 | 
			
		||||
 | 
			
		||||
    MdagMLinearOperator<Matrix,Field> HermOp(_Matrix);
 | 
			
		||||
 | 
			
		||||
    _Matrix.SetMass(1.0);
 | 
			
		||||
    _Matrix.Mdag(src,A);
 | 
			
		||||
    CG(HermOp,A,sol);
 | 
			
		||||
    _Matrix.SetMass(m);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class PauliVillarsSolverRBprec
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
  ConjugateGradient<Field> & CG;
 | 
			
		||||
  PauliVillarsSolverRBprec(  ConjugateGradient<Field> &_CG) : CG(_CG){};
 | 
			
		||||
 | 
			
		||||
  template<class Matrix>
 | 
			
		||||
  void operator() (Matrix &_Matrix,const Field &src,Field &sol)
 | 
			
		||||
  {
 | 
			
		||||
    RealD m = _Matrix.Mass();
 | 
			
		||||
    Field A  (_Matrix.FermionGrid());
 | 
			
		||||
 | 
			
		||||
    _Matrix.SetMass(1.0);
 | 
			
		||||
    SchurRedBlackDiagMooeeSolve<Field> SchurSolver(CG);
 | 
			
		||||
    SchurSolver(_Matrix,src,sol);
 | 
			
		||||
    _Matrix.SetMass(m);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Field,class PVinverter> class Reconstruct5DfromPhysical {
 | 
			
		||||
 private:
 | 
			
		||||
  PVinverter & PauliVillarsSolver;
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
 /////////////////////////////////////////////////////
 | 
			
		||||
 // First cut works, 10 Oct 2018.
 | 
			
		||||
 //
 | 
			
		||||
 // Must form a plan to get this into production for Zmobius acceleration
 | 
			
		||||
 // of the Mobius exact AMA corrections.
 | 
			
		||||
 //
 | 
			
		||||
 // TODO : understand absence of contact term in eqns in Hantao's thesis
 | 
			
		||||
 //        sol4 is contact term subtracted.
 | 
			
		||||
 //
 | 
			
		||||
 // Options
 | 
			
		||||
 // a) Defect correction approach:
 | 
			
		||||
 //    1) Compute defect from current soln (initially guess).
 | 
			
		||||
 //       This is ...... outerToInner check !!!!
 | 
			
		||||
 //    2) Deflated Zmobius solve to get 4d soln
 | 
			
		||||
 //       Ensure deflation is working
 | 
			
		||||
 //    3) Refine 5d Outer using the inner 4d delta soln
 | 
			
		||||
 // 
 | 
			
		||||
 // Step 1: localise PV inverse in a routine. [DONE]
 | 
			
		||||
 // Step 2: Schur based PV inverse            [DONE]
 | 
			
		||||
 // Step 3: Fourier accelerated PV inverse
 | 
			
		||||
 // Step 4: 
 | 
			
		||||
 /////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
  Reconstruct5DfromPhysical(PVinverter &_PauliVillarsSolver) 
 | 
			
		||||
    : PauliVillarsSolver(_PauliVillarsSolver) 
 | 
			
		||||
  { 
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   template<class Matrix>
 | 
			
		||||
   void PV(Matrix &_Matrix,const Field &src,Field &sol)
 | 
			
		||||
   {
 | 
			
		||||
     RealD m = _Matrix.Mass();
 | 
			
		||||
     _Matrix.SetMass(1.0);
 | 
			
		||||
     _Matrix.M(src,sol);
 | 
			
		||||
     _Matrix.SetMass(m);
 | 
			
		||||
   }
 | 
			
		||||
   template<class Matrix>
 | 
			
		||||
   void PVdag(Matrix &_Matrix,const Field &src,Field &sol)
 | 
			
		||||
   {
 | 
			
		||||
     RealD m = _Matrix.Mass();
 | 
			
		||||
     _Matrix.SetMass(1.0);
 | 
			
		||||
     _Matrix.Mdag(src,sol);
 | 
			
		||||
     _Matrix.SetMass(m);
 | 
			
		||||
   }
 | 
			
		||||
  template<class Matrix>
 | 
			
		||||
  void operator() (Matrix & _Matrix,const Field &sol4,const Field &src4, Field &sol5){
 | 
			
		||||
 | 
			
		||||
    int Ls =  _Matrix.Ls;
 | 
			
		||||
 | 
			
		||||
    Field psi4(_Matrix.GaugeGrid());
 | 
			
		||||
    Field psi(_Matrix.FermionGrid());
 | 
			
		||||
    Field A  (_Matrix.FermionGrid());
 | 
			
		||||
    Field B  (_Matrix.FermionGrid());
 | 
			
		||||
    Field c  (_Matrix.FermionGrid());
 | 
			
		||||
 | 
			
		||||
    typedef typename Matrix::Coeff_t Coeff_t;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage<< " ************************************************" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< " Reconstruct5Dprop: c.f. MADWF algorithm         " << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< " ************************************************" << std::endl;
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    //Import source, include Dminus factors
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    _Matrix.ImportPhysicalFermionSource(src4,B); 
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    // Set up c from src4
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    PauliVillarsSolver(_Matrix,B,A);
 | 
			
		||||
    _Matrix.Pdag(A,c);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////
 | 
			
		||||
    // Build Pdag PV^-1 Dm P [-sol4,c2,c3... cL]
 | 
			
		||||
    //////////////////////////////////////
 | 
			
		||||
    psi4 = - sol4;
 | 
			
		||||
    InsertSlice(psi4, psi, 0   , 0);
 | 
			
		||||
    for (int s=1;s<Ls;s++) {
 | 
			
		||||
      ExtractSlice(psi4,c,s,0);
 | 
			
		||||
       InsertSlice(psi4,psi,s,0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////
 | 
			
		||||
    // Pdag PV^-1 Dm P 
 | 
			
		||||
    /////////////////////////////
 | 
			
		||||
    _Matrix.P(psi,B);
 | 
			
		||||
    _Matrix.M(B,A);
 | 
			
		||||
    PauliVillarsSolver(_Matrix,A,B);
 | 
			
		||||
    _Matrix.Pdag(B,A);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////
 | 
			
		||||
    // Reinsert surface prop
 | 
			
		||||
    //////////////////////////////
 | 
			
		||||
    InsertSlice(sol4,A,0,0);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////
 | 
			
		||||
    // Convert from y back to x 
 | 
			
		||||
    //////////////////////////////
 | 
			
		||||
    _Matrix.P(A,sol5);
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
@@ -1,503 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
#define GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
   * Red black Schur decomposition
 | 
			
		||||
   *
 | 
			
		||||
   *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
 | 
			
		||||
   *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
 | 
			
		||||
   *                =         L                     D                     U
 | 
			
		||||
   *
 | 
			
		||||
   * L^-1 = (1              0 )
 | 
			
		||||
   *        (-MoeMee^{-1}   1 )   
 | 
			
		||||
   * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} )
 | 
			
		||||
   *           ( 0       1                    )
 | 
			
		||||
   * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} )
 | 
			
		||||
   *           ( 0       1                    )
 | 
			
		||||
   *
 | 
			
		||||
   * U^-1 = (1   -Mee^{-1} Meo)
 | 
			
		||||
   *        (0    1           )
 | 
			
		||||
   * U^{dag} = ( 1                 0)
 | 
			
		||||
   *           (Meo^dag Mee^{-dag} 1)
 | 
			
		||||
   * U^{-dag} = (  1                 0)
 | 
			
		||||
   *            (-Meo^dag Mee^{-dag} 1)
 | 
			
		||||
   ***********************
 | 
			
		||||
   *     M psi = eta
 | 
			
		||||
   ***********************
 | 
			
		||||
   *Odd
 | 
			
		||||
   * i)                 D_oo psi_o =  L^{-1}  eta_o
 | 
			
		||||
   *                        eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * Wilson:
 | 
			
		||||
   *      (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1}  eta_o
 | 
			
		||||
   * Stag:
 | 
			
		||||
   *      D_oo psi_o = L^{-1}  eta =    (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * L^-1 eta_o= (1              0 ) (e
 | 
			
		||||
   *             (-MoeMee^{-1}   1 )   
 | 
			
		||||
   *
 | 
			
		||||
   *Even
 | 
			
		||||
   * ii)  Mee psi_e + Meo psi_o = src_e
 | 
			
		||||
   *
 | 
			
		||||
   *   => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
   *
 | 
			
		||||
   * 
 | 
			
		||||
   * TODO: Other options:
 | 
			
		||||
   * 
 | 
			
		||||
   * a) change checkerboards for Schur e<->o
 | 
			
		||||
   *
 | 
			
		||||
   * Left precon by Moo^-1
 | 
			
		||||
   * b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 =  (D_oo)^dag M_oo^-dag Moo^-1 L^{-1}  eta_o
 | 
			
		||||
   *                              eta_o'     = (D_oo)^dag  M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *
 | 
			
		||||
   * Right precon by Moo^-1
 | 
			
		||||
   * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o
 | 
			
		||||
   *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
 | 
			
		||||
   *                              psi_o = M_oo^-1 phi_o
 | 
			
		||||
   * TODO: Deflation 
 | 
			
		||||
   */
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take a matrix and form a Red Black solver calling a Herm solver
 | 
			
		||||
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Now make the norm reflect extra factor of Mee
 | 
			
		||||
  template<class Field> class SchurRedBlackStaggeredSolve {
 | 
			
		||||
  private:
 | 
			
		||||
    OperatorFunction<Field> & _HermitianRBSolver;
 | 
			
		||||
    int CBfactorise;
 | 
			
		||||
    bool subGuess;
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Wrap the usual normal equations Schur trick
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
  SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  :
 | 
			
		||||
     _HermitianRBSolver(HermitianRBSolver) 
 | 
			
		||||
    { 
 | 
			
		||||
      CBfactorise=0;
 | 
			
		||||
      subtractGuess(initSubGuess);
 | 
			
		||||
    };
 | 
			
		||||
    void subtractGuess(const bool initSubGuess)
 | 
			
		||||
    {
 | 
			
		||||
      subGuess = initSubGuess;
 | 
			
		||||
    }
 | 
			
		||||
    bool isSubtractGuess(void)
 | 
			
		||||
    {
 | 
			
		||||
      return subGuess;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Matrix>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out){
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Matrix, class Guesser>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
 | 
			
		||||
 | 
			
		||||
      // FIXME CGdiagonalMee not implemented virtual function
 | 
			
		||||
      // FIXME use CBfactorise to control schur decomp
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
 
 | 
			
		||||
      Field src_e(grid);
 | 
			
		||||
      Field src_o(grid);
 | 
			
		||||
      Field sol_e(grid);
 | 
			
		||||
      Field sol_o(grid);
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
      std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      //src_o = tmp;     assert(src_o.checkerboard ==Odd);
 | 
			
		||||
      _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the red-black solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
 | 
			
		||||
      guess(src_o, sol_o);
 | 
			
		||||
      Mtmp = sol_o;
 | 
			
		||||
      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called  the Mpc solver" <<std::endl;
 | 
			
		||||
      // Fionn A2A boolean behavioural control
 | 
			
		||||
      if (subGuess)        sol_o = sol_o-Mtmp;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
 | 
			
		||||
      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
 | 
			
		||||
 | 
			
		||||
      // Verify the unprec residual
 | 
			
		||||
      if ( ! subGuess ) {
 | 
			
		||||
        _Matrix.M(out,resid); 
 | 
			
		||||
        resid = resid-in;
 | 
			
		||||
        RealD ns = norm2(in);
 | 
			
		||||
        RealD nr = norm2(resid);
 | 
			
		||||
        std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
 | 
			
		||||
      } else {
 | 
			
		||||
        std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }     
 | 
			
		||||
  };
 | 
			
		||||
  template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take a matrix and form a Red Black solver calling a Herm solver
 | 
			
		||||
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackDiagMooeeSolve {
 | 
			
		||||
  private:
 | 
			
		||||
    OperatorFunction<Field> & _HermitianRBSolver;
 | 
			
		||||
    int CBfactorise;
 | 
			
		||||
    bool subGuess;
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Wrap the usual normal equations Schur trick
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
  SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false)  :  _HermitianRBSolver(HermitianRBSolver) 
 | 
			
		||||
  { 
 | 
			
		||||
    CBfactorise=cb;
 | 
			
		||||
    subtractGuess(initSubGuess);
 | 
			
		||||
  };
 | 
			
		||||
    void subtractGuess(const bool initSubGuess)
 | 
			
		||||
    {
 | 
			
		||||
      subGuess = initSubGuess;
 | 
			
		||||
    }
 | 
			
		||||
    bool isSubtractGuess(void)
 | 
			
		||||
    {
 | 
			
		||||
      return subGuess;
 | 
			
		||||
    }
 | 
			
		||||
    template<class Matrix>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out){
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Matrix, class Guesser>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
 | 
			
		||||
 | 
			
		||||
      // FIXME CGdiagonalMee not implemented virtual function
 | 
			
		||||
      // FIXME use CBfactorise to control schur decomp
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
 
 | 
			
		||||
      Field src_e(grid);
 | 
			
		||||
      Field src_o(grid);
 | 
			
		||||
      Field sol_e(grid);
 | 
			
		||||
      Field sol_o(grid);
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      // get the right MpcDag
 | 
			
		||||
      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the red-black solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
 | 
			
		||||
      guess(src_o,sol_o);
 | 
			
		||||
      Mtmp = sol_o;
 | 
			
		||||
      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
      // Fionn A2A boolean behavioural control
 | 
			
		||||
      if (subGuess)        sol_o = sol_o-Mtmp;
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
 | 
			
		||||
      // Verify the unprec residual
 | 
			
		||||
      if ( ! subGuess ) {
 | 
			
		||||
        _Matrix.M(out,resid); 
 | 
			
		||||
        resid = resid-in;
 | 
			
		||||
        RealD ns = norm2(in);
 | 
			
		||||
        RealD nr = norm2(resid);
 | 
			
		||||
 | 
			
		||||
        std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
 | 
			
		||||
      } else {
 | 
			
		||||
        std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }     
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take a matrix and form a Red Black solver calling a Herm solver
 | 
			
		||||
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackDiagTwoSolve {
 | 
			
		||||
  private:
 | 
			
		||||
    OperatorFunction<Field> & _HermitianRBSolver;
 | 
			
		||||
    int CBfactorise;
 | 
			
		||||
    bool subGuess;
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Wrap the usual normal equations Schur trick
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
  SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  :
 | 
			
		||||
     _HermitianRBSolver(HermitianRBSolver) 
 | 
			
		||||
    { 
 | 
			
		||||
      CBfactorise = 0;
 | 
			
		||||
      subtractGuess(initSubGuess);
 | 
			
		||||
    };
 | 
			
		||||
    void subtractGuess(const bool initSubGuess)
 | 
			
		||||
    {
 | 
			
		||||
      subGuess = initSubGuess;
 | 
			
		||||
    }
 | 
			
		||||
    bool isSubtractGuess(void)
 | 
			
		||||
    {
 | 
			
		||||
      return subGuess;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Matrix>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out){
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Matrix,class Guesser>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
 | 
			
		||||
 | 
			
		||||
      // FIXME CGdiagonalMee not implemented virtual function
 | 
			
		||||
      // FIXME use CBfactorise to control schur decomp
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
 
 | 
			
		||||
      Field src_e(grid);
 | 
			
		||||
      Field src_o(grid);
 | 
			
		||||
      Field sol_e(grid);
 | 
			
		||||
      Field sol_o(grid);
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      // get the right MpcDag
 | 
			
		||||
      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the red-black solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
 | 
			
		||||
//      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
      guess(src_o,tmp);
 | 
			
		||||
      Mtmp = tmp;
 | 
			
		||||
      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd);
 | 
			
		||||
      // Fionn A2A boolean behavioural control
 | 
			
		||||
      if (subGuess)      tmp = tmp-Mtmp;
 | 
			
		||||
      _Matrix.MooeeInv(tmp,sol_o);       assert(  sol_o.checkerboard   ==Odd);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
 | 
			
		||||
      // Verify the unprec residual
 | 
			
		||||
      if ( ! subGuess ) {
 | 
			
		||||
        _Matrix.M(out,resid); 
 | 
			
		||||
        resid = resid-in;
 | 
			
		||||
        RealD ns = norm2(in);
 | 
			
		||||
        RealD nr = norm2(resid);
 | 
			
		||||
 | 
			
		||||
        std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
 | 
			
		||||
      } else {
 | 
			
		||||
        std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }     
 | 
			
		||||
  };
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take a matrix and form a Red Black solver calling a Herm solver
 | 
			
		||||
  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class Field> class SchurRedBlackDiagTwoMixed {
 | 
			
		||||
  private:
 | 
			
		||||
    LinearFunction<Field> & _HermitianRBSolver;
 | 
			
		||||
    int CBfactorise;
 | 
			
		||||
    bool subGuess;
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Wrap the usual normal equations Schur trick
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
  SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)  :
 | 
			
		||||
     _HermitianRBSolver(HermitianRBSolver) 
 | 
			
		||||
    { 
 | 
			
		||||
      CBfactorise=0;
 | 
			
		||||
      subtractGuess(initSubGuess);
 | 
			
		||||
    };
 | 
			
		||||
    void subtractGuess(const bool initSubGuess)
 | 
			
		||||
    {
 | 
			
		||||
      subGuess = initSubGuess;
 | 
			
		||||
    }
 | 
			
		||||
    bool isSubtractGuess(void)
 | 
			
		||||
    {
 | 
			
		||||
      return subGuess;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Matrix>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out){
 | 
			
		||||
      ZeroGuesser<Field> guess;
 | 
			
		||||
      (*this)(_Matrix,in,out,guess);
 | 
			
		||||
    }
 | 
			
		||||
    template<class Matrix, class Guesser>
 | 
			
		||||
    void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
 | 
			
		||||
 | 
			
		||||
      // FIXME CGdiagonalMee not implemented virtual function
 | 
			
		||||
      // FIXME use CBfactorise to control schur decomp
 | 
			
		||||
      GridBase *grid = _Matrix.RedBlackGrid();
 | 
			
		||||
      GridBase *fgrid= _Matrix.Grid();
 | 
			
		||||
 | 
			
		||||
      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
 | 
			
		||||
 
 | 
			
		||||
      Field src_e(grid);
 | 
			
		||||
      Field src_o(grid);
 | 
			
		||||
      Field sol_e(grid);
 | 
			
		||||
      Field sol_o(grid);
 | 
			
		||||
      Field   tmp(grid);
 | 
			
		||||
      Field  Mtmp(grid);
 | 
			
		||||
      Field resid(fgrid);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
 | 
			
		||||
      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
 | 
			
		||||
      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
 | 
			
		||||
 | 
			
		||||
      // get the right MpcDag
 | 
			
		||||
      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      // Call the red-black solver
 | 
			
		||||
      //////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
 | 
			
		||||
//      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
 | 
			
		||||
//      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd);
 | 
			
		||||
      guess(src_o,tmp);
 | 
			
		||||
      Mtmp = tmp;
 | 
			
		||||
      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd);
 | 
			
		||||
      // Fionn A2A boolean behavioural control
 | 
			
		||||
      if (subGuess)      tmp = tmp-Mtmp;
 | 
			
		||||
      _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd);
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
 | 
			
		||||
      ///////////////////////////////////////////////////
 | 
			
		||||
      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
 | 
			
		||||
      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
 | 
			
		||||
      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
     
 | 
			
		||||
      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
 | 
			
		||||
      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
 | 
			
		||||
 | 
			
		||||
      // Verify the unprec residual
 | 
			
		||||
      if ( ! subGuess ) {
 | 
			
		||||
        _Matrix.M(out,resid); 
 | 
			
		||||
        resid = resid-in;
 | 
			
		||||
        RealD ns = norm2(in);
 | 
			
		||||
        RealD nr = norm2(resid);
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
 | 
			
		||||
      } else {
 | 
			
		||||
        std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }     
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,125 +0,0 @@
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
MemoryStats *MemoryProfiler::stats = nullptr;
 | 
			
		||||
bool         MemoryProfiler::debug = false;
 | 
			
		||||
 | 
			
		||||
int PointerCache::victim;
 | 
			
		||||
 | 
			
		||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
 | 
			
		||||
 | 
			
		||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
 | 
			
		||||
 | 
			
		||||
  if (bytes < 4096 ) return ptr;
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
  assert(omp_in_parallel()==0);
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
  void * ret = NULL;
 | 
			
		||||
  int v = -1;
 | 
			
		||||
 | 
			
		||||
  for(int e=0;e<Ncache;e++) {
 | 
			
		||||
    if ( Entries[e].valid==0 ) {
 | 
			
		||||
      v=e; 
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( v==-1 ) {
 | 
			
		||||
    v=victim;
 | 
			
		||||
    victim = (victim+1)%Ncache;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( Entries[v].valid ) {
 | 
			
		||||
    ret = Entries[v].address;
 | 
			
		||||
    Entries[v].valid = 0;
 | 
			
		||||
    Entries[v].address = NULL;
 | 
			
		||||
    Entries[v].bytes = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Entries[v].address=ptr;
 | 
			
		||||
  Entries[v].bytes  =bytes;
 | 
			
		||||
  Entries[v].valid  =1;
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void *PointerCache::Lookup(size_t bytes) {
 | 
			
		||||
 | 
			
		||||
 if (bytes < 4096 ) return NULL;
 | 
			
		||||
 | 
			
		||||
#ifdef _OPENMP
 | 
			
		||||
  assert(omp_in_parallel()==0);
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
  for(int e=0;e<Ncache;e++){
 | 
			
		||||
    if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
 | 
			
		||||
      Entries[e].valid = 0;
 | 
			
		||||
      return Entries[e].address;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void check_huge_pages(void *Buf,uint64_t BYTES)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  int fd = open("/proc/self/pagemap", O_RDONLY);
 | 
			
		||||
  assert(fd >= 0);
 | 
			
		||||
  const int page_size = 4096;
 | 
			
		||||
  uint64_t virt_pfn = (uint64_t)Buf / page_size;
 | 
			
		||||
  off_t offset = sizeof(uint64_t) * virt_pfn;
 | 
			
		||||
  uint64_t npages = (BYTES + page_size-1) / page_size;
 | 
			
		||||
  uint64_t pagedata[npages];
 | 
			
		||||
  uint64_t ret = lseek(fd, offset, SEEK_SET);
 | 
			
		||||
  assert(ret == offset);
 | 
			
		||||
  ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
 | 
			
		||||
  assert(ret == sizeof(uint64_t) * npages);
 | 
			
		||||
  int nhugepages = npages / 512;
 | 
			
		||||
  int n4ktotal, nnothuge;
 | 
			
		||||
  n4ktotal = 0;
 | 
			
		||||
  nnothuge = 0;
 | 
			
		||||
  for (int i = 0; i < nhugepages; ++i) {
 | 
			
		||||
    uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
 | 
			
		||||
    for (int j = 0; j < 512; ++j) {
 | 
			
		||||
      uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
 | 
			
		||||
      ++n4ktotal;
 | 
			
		||||
      if (pageaddr != baseaddr + j * page_size)
 | 
			
		||||
	++nnothuge;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
  int rank = CartesianCommunicator::RankWorld();
 | 
			
		||||
  printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string sizeString(const size_t bytes)
 | 
			
		||||
{
 | 
			
		||||
  constexpr unsigned int bufSize = 256;
 | 
			
		||||
  const char             *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
 | 
			
		||||
  char                   buf[256];
 | 
			
		||||
  size_t                 s     = 0;
 | 
			
		||||
  double                 count = bytes;
 | 
			
		||||
  
 | 
			
		||||
  while (count >= 1024 && s < 7)
 | 
			
		||||
  {
 | 
			
		||||
      s++;
 | 
			
		||||
      count /= 1024;
 | 
			
		||||
  }
 | 
			
		||||
  if (count - floor(count) == 0.0)
 | 
			
		||||
  {
 | 
			
		||||
      snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
      snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return std::string(buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,174 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_full.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_FULL_H
 | 
			
		||||
#define GRID_CARTESIAN_FULL_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
    
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Grid Support.
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GridCartesian: public GridBase {
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    int dummy;
 | 
			
		||||
    virtual int  CheckerBoardFromOindexTable (int Oindex) {
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int  CheckerBoardFromOindex (int Oindex)
 | 
			
		||||
    {
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoarded(int dim){
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoard(const std::vector<int> &site){
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardDestination(int cb,int shift,int dim){
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
 | 
			
		||||
      return shift;
 | 
			
		||||
    }
 | 
			
		||||
    virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
 | 
			
		||||
      return shift;
 | 
			
		||||
    }
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Constructor takes a parent grid and possibly subdivides communicator.
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    GridCartesian(const std::vector<int> &dimensions,
 | 
			
		||||
		  const std::vector<int> &simd_layout,
 | 
			
		||||
		  const std::vector<int> &processor_grid,
 | 
			
		||||
		  const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
 | 
			
		||||
    {
 | 
			
		||||
      Init(dimensions,simd_layout,processor_grid);
 | 
			
		||||
    }
 | 
			
		||||
    GridCartesian(const std::vector<int> &dimensions,
 | 
			
		||||
		  const std::vector<int> &simd_layout,
 | 
			
		||||
		  const std::vector<int> &processor_grid,
 | 
			
		||||
		  const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
 | 
			
		||||
    {
 | 
			
		||||
      Init(dimensions,simd_layout,processor_grid);
 | 
			
		||||
    }
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Construct from comm world
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    GridCartesian(const std::vector<int> &dimensions,
 | 
			
		||||
		  const std::vector<int> &simd_layout,
 | 
			
		||||
		  const std::vector<int> &processor_grid) : GridBase(processor_grid)
 | 
			
		||||
    {
 | 
			
		||||
      Init(dimensions,simd_layout,processor_grid);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual ~GridCartesian() = default;
 | 
			
		||||
 | 
			
		||||
    void Init(const std::vector<int> &dimensions,
 | 
			
		||||
	      const std::vector<int> &simd_layout,
 | 
			
		||||
	      const std::vector<int> &processor_grid)
 | 
			
		||||
    {
 | 
			
		||||
      ///////////////////////
 | 
			
		||||
      // Grid information
 | 
			
		||||
      ///////////////////////
 | 
			
		||||
      _isCheckerBoarded = false;
 | 
			
		||||
      _ndimension = dimensions.size();
 | 
			
		||||
 | 
			
		||||
      _fdimensions.resize(_ndimension);
 | 
			
		||||
      _gdimensions.resize(_ndimension);
 | 
			
		||||
      _ldimensions.resize(_ndimension);
 | 
			
		||||
      _rdimensions.resize(_ndimension);
 | 
			
		||||
      _simd_layout.resize(_ndimension);
 | 
			
		||||
      _lstart.resize(_ndimension);
 | 
			
		||||
      _lend.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
      _ostride.resize(_ndimension);
 | 
			
		||||
      _istride.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
      _fsites = _gsites = _osites = _isites = 1;
 | 
			
		||||
 | 
			
		||||
      for (int d = 0; d < _ndimension; d++)
 | 
			
		||||
      {
 | 
			
		||||
        _fdimensions[d] = dimensions[d];   // Global dimensions
 | 
			
		||||
        _gdimensions[d] = _fdimensions[d]; // Global dimensions
 | 
			
		||||
        _simd_layout[d] = simd_layout[d];
 | 
			
		||||
        _fsites = _fsites * _fdimensions[d];
 | 
			
		||||
        _gsites = _gsites * _gdimensions[d];
 | 
			
		||||
 | 
			
		||||
        // Use a reduced simd grid
 | 
			
		||||
        _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
 | 
			
		||||
        //std::cout << _ldimensions[d] << "  " << _gdimensions[d] << "  " << _processors[d] << std::endl;
 | 
			
		||||
        assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
 | 
			
		||||
 | 
			
		||||
        _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
 | 
			
		||||
        assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
 | 
			
		||||
 | 
			
		||||
        _lstart[d] = _processor_coor[d] * _ldimensions[d];
 | 
			
		||||
        _lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
 | 
			
		||||
        _osites *= _rdimensions[d];
 | 
			
		||||
        _isites *= _simd_layout[d];
 | 
			
		||||
 | 
			
		||||
        // Addressing support
 | 
			
		||||
        if (d == 0)
 | 
			
		||||
        {
 | 
			
		||||
          _ostride[d] = 1;
 | 
			
		||||
          _istride[d] = 1;
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
 | 
			
		||||
          _istride[d] = _istride[d - 1] * _simd_layout[d - 1];
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      ///////////////////////
 | 
			
		||||
      // subplane information
 | 
			
		||||
      ///////////////////////
 | 
			
		||||
      _slice_block.resize(_ndimension);
 | 
			
		||||
      _slice_stride.resize(_ndimension);
 | 
			
		||||
      _slice_nblock.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
      int block = 1;
 | 
			
		||||
      int nblock = 1;
 | 
			
		||||
      for (int d = 0; d < _ndimension; d++)
 | 
			
		||||
        nblock *= _rdimensions[d];
 | 
			
		||||
 | 
			
		||||
      for (int d = 0; d < _ndimension; d++)
 | 
			
		||||
      {
 | 
			
		||||
        nblock /= _rdimensions[d];
 | 
			
		||||
        _slice_block[d] = block;
 | 
			
		||||
        _slice_stride[d] = _ostride[d] * _rdimensions[d];
 | 
			
		||||
        _slice_nblock[d] = nblock;
 | 
			
		||||
        block = block * _rdimensions[d];
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,514 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
#include <Grid/communicator/SharedMemory.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
Grid_MPI_Comm       CartesianCommunicator::communicator_world;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// First initialise of comms system
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) 
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  int flag;
 | 
			
		||||
  int provided;
 | 
			
		||||
 | 
			
		||||
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
			
		||||
  if ( !flag ) {
 | 
			
		||||
    MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
 | 
			
		||||
    //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
 | 
			
		||||
    if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
 | 
			
		||||
        (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
 | 
			
		||||
      assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Grid_quiesce_nodes();
 | 
			
		||||
 | 
			
		||||
  // Never clean up as done once.
 | 
			
		||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
 | 
			
		||||
  GlobalSharedMemory::Init(communicator_world);
 | 
			
		||||
  GlobalSharedMemory::SharedMemoryAllocate(
 | 
			
		||||
		   GlobalSharedMemory::MAX_MPI_SHM_BYTES,
 | 
			
		||||
		   GlobalSharedMemory::Hugepages);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Use cartesian communicators now even in MPI3
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
			
		||||
{
 | 
			
		||||
  int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  int rank;
 | 
			
		||||
  int ierr=MPI_Cart_rank  (communicator, &coor[0], &rank);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  return rank;
 | 
			
		||||
}
 | 
			
		||||
void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  coor.resize(_ndimension);
 | 
			
		||||
  int ierr=MPI_Cart_coords  (communicator, rank, _ndimension,&coor[0]);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Initialises from communicator_world
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) 
 | 
			
		||||
{
 | 
			
		||||
  MPI_Comm optimal_comm;
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Remap using the shared memory optimising routine
 | 
			
		||||
  // The remap creates a comm which must be freed
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  GlobalSharedMemory::OptimalCommunicator    (processors,optimal_comm);
 | 
			
		||||
  InitFromMPICommunicator(processors,optimal_comm);
 | 
			
		||||
  SetCommunicator(optimal_comm);
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  // Free the temp communicator
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  MPI_Comm_free(&optimal_comm);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////
 | 
			
		||||
// Try to subdivide communicator
 | 
			
		||||
//////////////////////////////////
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)    
 | 
			
		||||
{
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
 | 
			
		||||
  int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
 | 
			
		||||
  std::vector<int> parent_processor_coor(_ndimension,0);
 | 
			
		||||
  std::vector<int> parent_processors    (_ndimension,1);
 | 
			
		||||
 | 
			
		||||
  // Can make 5d grid from 4d etc...
 | 
			
		||||
  int pad = _ndimension-parent_ndimension;
 | 
			
		||||
  for(int d=0;d<parent_ndimension;d++){
 | 
			
		||||
    parent_processor_coor[pad+d]=parent._processor_coor[d];
 | 
			
		||||
    parent_processors    [pad+d]=parent._processors[d];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // split the communicator
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  int Nparent = parent._processors ; 
 | 
			
		||||
  //  std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
 | 
			
		||||
  int Nparent;
 | 
			
		||||
  MPI_Comm_size(parent.communicator,&Nparent);
 | 
			
		||||
  //  std::cout << " Parent size  "<<Nparent <<std::endl;
 | 
			
		||||
 | 
			
		||||
  int childsize=1;
 | 
			
		||||
  for(int d=0;d<processors.size();d++) {
 | 
			
		||||
    childsize *= processors[d];
 | 
			
		||||
  }
 | 
			
		||||
  int Nchild = Nparent/childsize;
 | 
			
		||||
  assert (childsize * Nchild == Nparent);
 | 
			
		||||
 | 
			
		||||
  //  std::cout << " child size  "<<childsize <<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> ccoor(_ndimension); // coor within subcommunicator
 | 
			
		||||
  std::vector<int> scoor(_ndimension); // coor of split within parent
 | 
			
		||||
  std::vector<int> ssize(_ndimension); // coor of split within parent
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<_ndimension;d++){
 | 
			
		||||
    ccoor[d] = parent_processor_coor[d] % processors[d];
 | 
			
		||||
    scoor[d] = parent_processor_coor[d] / processors[d];
 | 
			
		||||
    ssize[d] = parent_processors[d]     / processors[d];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
 | 
			
		||||
  int crank;  
 | 
			
		||||
  // Mpi uses the reverse Lexico convention to us; so reversed routines called
 | 
			
		||||
  Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
 | 
			
		||||
  Lexicographic::IndexFromCoorReversed(scoor,srank,ssize);      // ssize is the number of split grids
 | 
			
		||||
 | 
			
		||||
  MPI_Comm comm_split;
 | 
			
		||||
  if ( Nchild > 1 ) { 
 | 
			
		||||
 | 
			
		||||
    if(0){
 | 
			
		||||
      std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
 | 
			
		||||
      std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"]    ";
 | 
			
		||||
      for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processors[d] << " ";
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage<<" child grid["<< _ndimension <<"]    ";
 | 
			
		||||
      for(int d=0;d<processors.size();d++)  std::cout << processors[d] << " ";
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"]    ";
 | 
			
		||||
      for(int d=0;d<parent._ndimension;d++)  std::cout << parent._processor_coor[d] << " ";
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"]    ";
 | 
			
		||||
      for(int d=0;d<processors.size();d++)  std::cout << scoor[d] << " ";
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"]    ";
 | 
			
		||||
      for(int d=0;d<processors.size();d++)  std::cout << ccoor[d] << " ";
 | 
			
		||||
      std::cout<<std::endl;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Declare victory
 | 
			
		||||
      //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
      std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
 | 
			
		||||
		<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
 | 
			
		||||
      std::cout << " Split communicator " <<comm_split <<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Split the communicator
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
  } else {
 | 
			
		||||
    srank = 0;
 | 
			
		||||
    int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Set up from the new split communicator
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  InitFromMPICommunicator(processors,comm_split);
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Take the right SHM buffers
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  SetCommunicator(comm_split);
 | 
			
		||||
  
 | 
			
		||||
  ///////////////////////////////////////////////
 | 
			
		||||
  // Free the temp communicator 
 | 
			
		||||
  ///////////////////////////////////////////////
 | 
			
		||||
  MPI_Comm_free(&comm_split);
 | 
			
		||||
 | 
			
		||||
  if(0){ 
 | 
			
		||||
    std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
 | 
			
		||||
    for(int d=0;d<processors.size();d++){
 | 
			
		||||
      std::cout << d<< " " << _processor_coor[d] <<" " <<  ccoor[d]<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  for(int d=0;d<processors.size();d++){
 | 
			
		||||
    assert(_processor_coor[d] == ccoor[d] );
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
 | 
			
		||||
{
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Creates communicator, and the communicator_halo
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  // Count the requested nodes
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  _Nprocessors=1;
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  for(int i=0;i<_ndimension;i++){
 | 
			
		||||
    _Nprocessors*=_processors[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<int> periodic(_ndimension,1);
 | 
			
		||||
  MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
 | 
			
		||||
  MPI_Comm_rank(communicator,&_processor);
 | 
			
		||||
  MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
 | 
			
		||||
 | 
			
		||||
  if ( 0 && (communicator_base != communicator_world) ) {
 | 
			
		||||
    std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
 | 
			
		||||
    std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
 | 
			
		||||
    for(int d=0;d<_processors.size();d++){
 | 
			
		||||
      std::cout << _processor_coor[d]<<" ";
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int Size;
 | 
			
		||||
  MPI_Comm_size(communicator,&Size);
 | 
			
		||||
 | 
			
		||||
  communicator_halo.resize (2*_ndimension);
 | 
			
		||||
  for(int i=0;i<_ndimension*2;i++){
 | 
			
		||||
    MPI_Comm_dup(communicator,&communicator_halo[i]);
 | 
			
		||||
  }
 | 
			
		||||
  assert(Size==_Nprocessors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::~CartesianCommunicator()
 | 
			
		||||
{
 | 
			
		||||
  int MPI_is_finalised;
 | 
			
		||||
  MPI_Finalized(&MPI_is_finalised);
 | 
			
		||||
  if (communicator && !MPI_is_finalised) {
 | 
			
		||||
    MPI_Comm_free(&communicator);
 | 
			
		||||
    for(int i=0;i<communicator_halo.size();i++){
 | 
			
		||||
      MPI_Comm_free(&communicator_halo[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }  
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalXOR(uint64_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
			
		||||
{
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int from,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<CommsRequest_t> reqs(0);
 | 
			
		||||
  //    unsigned long  xcrc = crc32(0L, Z_NULL, 0);
 | 
			
		||||
  //    unsigned long  rcrc = crc32(0L, Z_NULL, 0);
 | 
			
		||||
  //    xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
  //    rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
 | 
			
		||||
  //    printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Status stat;
 | 
			
		||||
  assert(sender != receiver);
 | 
			
		||||
  int tag = sender;
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) { 
 | 
			
		||||
    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int myrank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
 | 
			
		||||
  if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { 
 | 
			
		||||
    MPI_Request xrq;
 | 
			
		||||
    MPI_Request rrq;
 | 
			
		||||
 | 
			
		||||
    ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
    ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
    
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(xrq);
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
  } else { 
 | 
			
		||||
    // Give the CPU to MPI immediately; can use threads to overlap optionally
 | 
			
		||||
    ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
 | 
			
		||||
		      recv,bytes,MPI_CHAR,from, from,
 | 
			
		||||
		      communicator,MPI_STATUS_IGNORE);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
 | 
			
		||||
						     int dest,
 | 
			
		||||
						     void *recv,
 | 
			
		||||
						     int from,
 | 
			
		||||
						     int bytes,int dir)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<CommsRequest_t> list;
 | 
			
		||||
  double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
 | 
			
		||||
  StencilSendToRecvFromComplete(list,dir);
 | 
			
		||||
  return offbytes;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
							 void *xmit,
 | 
			
		||||
							 int dest,
 | 
			
		||||
							 void *recv,
 | 
			
		||||
							 int from,
 | 
			
		||||
							 int bytes,int dir)
 | 
			
		||||
{
 | 
			
		||||
  int ncomm  =communicator_halo.size(); 
 | 
			
		||||
  int commdir=dir%ncomm;
 | 
			
		||||
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
 | 
			
		||||
  int ierr;
 | 
			
		||||
  int gdest = ShmRanks[dest];
 | 
			
		||||
  int gfrom = ShmRanks[from];
 | 
			
		||||
  int gme   = ShmRanks[_processor];
 | 
			
		||||
 | 
			
		||||
  assert(dest != _processor);
 | 
			
		||||
  assert(from != _processor);
 | 
			
		||||
  assert(gme  == ShmRank);
 | 
			
		||||
  double off_node_bytes=0.0;
 | 
			
		||||
 | 
			
		||||
  if ( gfrom ==MPI_UNDEFINED) {
 | 
			
		||||
    ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(rrq);
 | 
			
		||||
    off_node_bytes+=bytes;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( gdest == MPI_UNDEFINED ) {
 | 
			
		||||
    ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
 | 
			
		||||
    assert(ierr==0);
 | 
			
		||||
    list.push_back(xrq);
 | 
			
		||||
    off_node_bytes+=bytes;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( CommunicatorPolicy == CommunicatorPolicySequential ) { 
 | 
			
		||||
    this->StencilSendToRecvFromComplete(list,dir);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return off_node_bytes;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromComplete(waitall);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::StencilBarrier(void)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Barrier  (ShmComm);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  int nreq=list.size();
 | 
			
		||||
 | 
			
		||||
  if (nreq==0) return;
 | 
			
		||||
 | 
			
		||||
  std::vector<MPI_Status> status(nreq);
 | 
			
		||||
  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  list.resize(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Barrier(void)
 | 
			
		||||
{
 | 
			
		||||
  int ierr = MPI_Barrier(communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int ierr=MPI_Bcast(data,
 | 
			
		||||
		     bytes,
 | 
			
		||||
		     MPI_BYTE,
 | 
			
		||||
		     root,
 | 
			
		||||
		     communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankWorld(void){ 
 | 
			
		||||
  int r; 
 | 
			
		||||
  MPI_Comm_rank(communicator_world,&r);
 | 
			
		||||
  return r;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int ierr= MPI_Bcast(data,
 | 
			
		||||
		      bytes,
 | 
			
		||||
		      MPI_BYTE,
 | 
			
		||||
		      root,
 | 
			
		||||
		      communicator_world);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,uint64_t bytes)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> row(_ndimension,1);
 | 
			
		||||
  assert(dim>=0 && dim<_ndimension);
 | 
			
		||||
 | 
			
		||||
  //  Split the communicator
 | 
			
		||||
  row[dim] = _processors[dim];
 | 
			
		||||
 | 
			
		||||
  int me;
 | 
			
		||||
  CartesianCommunicator Comm(row,*this,me);
 | 
			
		||||
  Comm.AllToAll(in,out,words,bytes);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
 | 
			
		||||
{
 | 
			
		||||
  // MPI is a pain and uses "int" arguments
 | 
			
		||||
  // 64*64*64*128*16 == 500Million elements of data.
 | 
			
		||||
  // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
 | 
			
		||||
  // (Turns up on 32^3 x 64 Gparity too)
 | 
			
		||||
  MPI_Datatype object;
 | 
			
		||||
  int iwords; 
 | 
			
		||||
  int ibytes;
 | 
			
		||||
  iwords = words;
 | 
			
		||||
  ibytes = bytes;
 | 
			
		||||
  assert(words == iwords); // safe to cast to int ?
 | 
			
		||||
  assert(bytes == ibytes); // safe to cast to int ?
 | 
			
		||||
  MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
 | 
			
		||||
  MPI_Type_commit(&object);
 | 
			
		||||
  MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
 | 
			
		||||
  MPI_Type_free(&object);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,92 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/SharedMemory.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
// static data
 | 
			
		||||
 | 
			
		||||
uint64_t            GlobalSharedMemory::MAX_MPI_SHM_BYTES   = 1024LL*1024LL*1024LL; 
 | 
			
		||||
int                 GlobalSharedMemory::Hugepages = 0;
 | 
			
		||||
int                 GlobalSharedMemory::_ShmSetup;
 | 
			
		||||
int                 GlobalSharedMemory::_ShmAlloc;
 | 
			
		||||
uint64_t            GlobalSharedMemory::_ShmAllocBytes;
 | 
			
		||||
 | 
			
		||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
 | 
			
		||||
 | 
			
		||||
Grid_MPI_Comm       GlobalSharedMemory::WorldShmComm;
 | 
			
		||||
int                 GlobalSharedMemory::WorldShmRank;
 | 
			
		||||
int                 GlobalSharedMemory::WorldShmSize;
 | 
			
		||||
std::vector<int>    GlobalSharedMemory::WorldShmRanks;
 | 
			
		||||
 | 
			
		||||
Grid_MPI_Comm       GlobalSharedMemory::WorldComm;
 | 
			
		||||
int                 GlobalSharedMemory::WorldSize;
 | 
			
		||||
int                 GlobalSharedMemory::WorldRank;
 | 
			
		||||
 | 
			
		||||
int                 GlobalSharedMemory::WorldNodes;
 | 
			
		||||
int                 GlobalSharedMemory::WorldNode;
 | 
			
		||||
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryFree(void)
 | 
			
		||||
{
 | 
			
		||||
  assert(_ShmAlloc);
 | 
			
		||||
  assert(_ShmAllocBytes>0);
 | 
			
		||||
  for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
    munmap(WorldShmCommBufs[r],_ShmAllocBytes);
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc = 0;
 | 
			
		||||
  _ShmAllocBytes = 0;
 | 
			
		||||
}
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
// Alloc, free shmem region
 | 
			
		||||
/////////////////////////////////
 | 
			
		||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
 | 
			
		||||
  //  bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
 | 
			
		||||
  void *ptr = (void *)heap_top;
 | 
			
		||||
  heap_top  += bytes;
 | 
			
		||||
  heap_bytes+= bytes;
 | 
			
		||||
  if (heap_bytes >= heap_size) {
 | 
			
		||||
    std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
 | 
			
		||||
    std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
 | 
			
		||||
    std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
 | 
			
		||||
    assert(heap_bytes<heap_size);
 | 
			
		||||
  }
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
void SharedMemory::ShmBufferFreeAll(void) { 
 | 
			
		||||
  heap_top  =(size_t)ShmBufferSelf();
 | 
			
		||||
  heap_bytes=0;
 | 
			
		||||
}
 | 
			
		||||
void *SharedMemory::ShmBufferSelf(void)
 | 
			
		||||
{
 | 
			
		||||
  return ShmCommBufs[ShmRank];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,165 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/SharedMemory.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// TODO
 | 
			
		||||
// 1) move includes into SharedMemory.cc
 | 
			
		||||
//
 | 
			
		||||
// 2) split shared memory into a) optimal communicator creation from comm world
 | 
			
		||||
// 
 | 
			
		||||
//                             b) shared memory buffers container
 | 
			
		||||
//                                -- static globally shared; init once
 | 
			
		||||
//                                -- per instance set of buffers.
 | 
			
		||||
//                                   
 | 
			
		||||
 | 
			
		||||
#pragma once 
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
 | 
			
		||||
#if defined (GRID_COMMS_MPI3) 
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#endif 
 | 
			
		||||
#include <semaphore.h>
 | 
			
		||||
#include <fcntl.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
#include <sys/ipc.h>
 | 
			
		||||
#include <sys/shm.h>
 | 
			
		||||
#include <sys/mman.h>
 | 
			
		||||
#include <zlib.h>
 | 
			
		||||
#ifdef HAVE_NUMAIF_H
 | 
			
		||||
#include <numaif.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#if defined (GRID_COMMS_MPI3) 
 | 
			
		||||
  typedef MPI_Comm    Grid_MPI_Comm;
 | 
			
		||||
  typedef MPI_Request CommsRequest_t;
 | 
			
		||||
#else 
 | 
			
		||||
  typedef int CommsRequest_t;
 | 
			
		||||
  typedef int Grid_MPI_Comm;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
class GlobalSharedMemory {
 | 
			
		||||
 private:
 | 
			
		||||
  static const int     MAXLOG2RANKSPERNODE = 16;            
 | 
			
		||||
 | 
			
		||||
  // Init once lock on the buffer allocation
 | 
			
		||||
  static int      _ShmSetup;
 | 
			
		||||
  static int      _ShmAlloc;
 | 
			
		||||
  static uint64_t _ShmAllocBytes;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  static int      ShmSetup(void)      { return _ShmSetup; }
 | 
			
		||||
  static int      ShmAlloc(void)      { return _ShmAlloc; }
 | 
			
		||||
  static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
 | 
			
		||||
  static uint64_t      MAX_MPI_SHM_BYTES;
 | 
			
		||||
  static int           Hugepages;
 | 
			
		||||
 | 
			
		||||
  static std::vector<void *> WorldShmCommBufs;
 | 
			
		||||
 | 
			
		||||
  static Grid_MPI_Comm WorldComm;
 | 
			
		||||
  static int           WorldRank;
 | 
			
		||||
  static int           WorldSize;
 | 
			
		||||
 | 
			
		||||
  static Grid_MPI_Comm WorldShmComm;
 | 
			
		||||
  static int           WorldShmRank;
 | 
			
		||||
  static int           WorldShmSize;
 | 
			
		||||
 | 
			
		||||
  static int           WorldNodes;
 | 
			
		||||
  static int           WorldNode;
 | 
			
		||||
 | 
			
		||||
  static std::vector<int>  WorldShmRanks;
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Create an optimal reordered communicator that makes MPI_Cart_create get it right
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
 | 
			
		||||
  static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  // Provide shared memory facilities off comm world
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  static void SharedMemoryAllocate(uint64_t bytes, int flags);
 | 
			
		||||
  static void SharedMemoryFree(void);
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//////////////////////////////
 | 
			
		||||
// one per communicator
 | 
			
		||||
//////////////////////////////
 | 
			
		||||
class SharedMemory 
 | 
			
		||||
{
 | 
			
		||||
 private:
 | 
			
		||||
  static const int     MAXLOG2RANKSPERNODE = 16;            
 | 
			
		||||
 | 
			
		||||
  size_t heap_top;
 | 
			
		||||
  size_t heap_bytes;
 | 
			
		||||
  size_t heap_size;
 | 
			
		||||
 | 
			
		||||
 protected:
 | 
			
		||||
 | 
			
		||||
  Grid_MPI_Comm    ShmComm; // for barriers
 | 
			
		||||
  int    ShmRank; 
 | 
			
		||||
  int    ShmSize;
 | 
			
		||||
  std::vector<void *> ShmCommBufs;
 | 
			
		||||
  std::vector<int>    ShmRanks;// Mapping comm ranks to Shm ranks
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  SharedMemory() {};
 | 
			
		||||
  ~SharedMemory();
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // set the buffers & sizes
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  void SetCommunicator(Grid_MPI_Comm comm);
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // For this instance ; disjoint buffer sets between splits if split grid
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  void ShmBarrier(void); 
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  // Call on any instance
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  void SharedMemoryTest(void);
 | 
			
		||||
  void *ShmBufferSelf(void);
 | 
			
		||||
  void *ShmBuffer    (int rank);
 | 
			
		||||
  void *ShmBufferTranslate(int rank,void * local_p);
 | 
			
		||||
  void *ShmBufferMalloc(size_t bytes);
 | 
			
		||||
  void  ShmBufferFreeAll(void) ;
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Make info on Nodes & ranks and Shared memory available
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
 | 
			
		||||
  int RankCount(void) { return GlobalSharedMemory::WorldSize;};
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,651 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/SharedMemory.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
#include <pwd.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
/*Construct from an MPI communicator*/
 | 
			
		||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
 | 
			
		||||
{
 | 
			
		||||
  assert(_ShmSetup==0);
 | 
			
		||||
  WorldComm = comm;
 | 
			
		||||
  MPI_Comm_rank(WorldComm,&WorldRank);
 | 
			
		||||
  MPI_Comm_size(WorldComm,&WorldSize);
 | 
			
		||||
  // WorldComm, WorldSize, WorldRank
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Split into groups that can share memory
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
 | 
			
		||||
  MPI_Comm_rank(WorldShmComm     ,&WorldShmRank);
 | 
			
		||||
  MPI_Comm_size(WorldShmComm     ,&WorldShmSize);
 | 
			
		||||
  // WorldShmComm, WorldShmSize, WorldShmRank
 | 
			
		||||
 | 
			
		||||
  // WorldNodes
 | 
			
		||||
  WorldNodes = WorldSize/WorldShmSize;
 | 
			
		||||
  assert( (WorldNodes * WorldShmSize) == WorldSize );
 | 
			
		||||
 | 
			
		||||
  // FIXME: Check all WorldShmSize are the same ?
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // find world ranks in our SHM group (i.e. which ranks are on our node)
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Group WorldGroup, ShmGroup;
 | 
			
		||||
  MPI_Comm_group (WorldComm, &WorldGroup); 
 | 
			
		||||
  MPI_Comm_group (WorldShmComm, &ShmGroup);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> world_ranks(WorldSize);   for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
 | 
			
		||||
 | 
			
		||||
  WorldShmRanks.resize(WorldSize); 
 | 
			
		||||
  MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]); 
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify who is in my group and nominate the leader
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int g=0;
 | 
			
		||||
  std::vector<int> MyGroup;
 | 
			
		||||
  MyGroup.resize(WorldShmSize);
 | 
			
		||||
  for(int rank=0;rank<WorldSize;rank++){
 | 
			
		||||
    if(WorldShmRanks[rank]!=MPI_UNDEFINED){
 | 
			
		||||
      assert(g<WorldShmSize);
 | 
			
		||||
      MyGroup[g++] = rank;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
 | 
			
		||||
  int myleader = MyGroup[0];
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> leaders_1hot(WorldSize,0);
 | 
			
		||||
  std::vector<int> leaders_group(WorldNodes,0);
 | 
			
		||||
  leaders_1hot [ myleader ] = 1;
 | 
			
		||||
    
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // global sum leaders over comm world
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // find the group leaders world rank
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  int group=0;
 | 
			
		||||
  for(int l=0;l<WorldSize;l++){
 | 
			
		||||
    if(leaders_1hot[l]){
 | 
			
		||||
      leaders_group[group++] = l;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify the node of the group in which I (and my leader) live
 | 
			
		||||
  ///////////////////////////////////////////////////////////////////
 | 
			
		||||
  WorldNode=-1;
 | 
			
		||||
  for(int g=0;g<WorldNodes;g++){
 | 
			
		||||
    if (myleader == leaders_group[g]){
 | 
			
		||||
      WorldNode=g;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  assert(WorldNode!=-1);
 | 
			
		||||
  _ShmSetup=1;
 | 
			
		||||
}
 | 
			
		||||
// Gray encode support 
 | 
			
		||||
int BinaryToGray (int  binary) {
 | 
			
		||||
  int gray = (binary>>1)^binary;
 | 
			
		||||
  return gray;
 | 
			
		||||
}
 | 
			
		||||
int Log2Size(int TwoToPower,int MAXLOG2)
 | 
			
		||||
{
 | 
			
		||||
  int log2size = -1;
 | 
			
		||||
  for(int i=0;i<=MAXLOG2;i++){
 | 
			
		||||
    if ( (0x1<<i) == TwoToPower ) {
 | 
			
		||||
      log2size = i;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return log2size;
 | 
			
		||||
}
 | 
			
		||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 | 
			
		||||
{
 | 
			
		||||
#ifdef HYPERCUBE
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Assert power of two shm_size.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
 | 
			
		||||
  assert(log2size != -1);
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify the hypercube coordinate of this node using hostname
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // n runs 0...7 9...16 18...25 27...34     (8*4)  5 bits
 | 
			
		||||
  // i runs 0..7                                    3 bits
 | 
			
		||||
  // r runs 0..3                                    2 bits
 | 
			
		||||
  // 2^10 = 1024 nodes
 | 
			
		||||
  const int maxhdim = 10; 
 | 
			
		||||
  std::vector<int> HyperCubeCoords(maxhdim,0);
 | 
			
		||||
  std::vector<int> RootHyperCubeCoords(maxhdim,0);
 | 
			
		||||
  int R;
 | 
			
		||||
  int I;
 | 
			
		||||
  int N;
 | 
			
		||||
  const int namelen = _POSIX_HOST_NAME_MAX;
 | 
			
		||||
  char name[namelen];
 | 
			
		||||
 | 
			
		||||
  // Parse ICE-XA hostname to get hypercube location
 | 
			
		||||
  gethostname(name,namelen);
 | 
			
		||||
  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
 | 
			
		||||
  assert(nscan==3);
 | 
			
		||||
 | 
			
		||||
  int nlo = N%9;
 | 
			
		||||
  int nhi = N/9;
 | 
			
		||||
  uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
 | 
			
		||||
  uint32_t rootcoor  = hypercoor;
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Print debug info
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  for(int d=0;d<maxhdim;d++){
 | 
			
		||||
    HyperCubeCoords[d] = (hypercoor>>d)&0x1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string hname(name);
 | 
			
		||||
  std::cout << "hostname "<<hname<<std::endl;
 | 
			
		||||
  std::cout << "R " << R << " I " << I << " N "<< N
 | 
			
		||||
            << " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  // broadcast node 0's base coordinate for this partition.
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm); 
 | 
			
		||||
  hypercoor=hypercoor-rootcoor;
 | 
			
		||||
  assert(hypercoor<WorldSize);
 | 
			
		||||
  assert(hypercoor>=0);
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
  // Printing
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
  for(int d=0;d<maxhdim;d++){
 | 
			
		||||
    HyperCubeCoords[d] = (hypercoor>>d)&0x1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify subblock of ranks on node spreading across dims
 | 
			
		||||
  // in a maximally symmetrical way
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ndimension              = processors.size();
 | 
			
		||||
  std::vector<int> processor_coor(ndimension);
 | 
			
		||||
  std::vector<int> WorldDims = processors;   std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
 | 
			
		||||
  std::vector<int> ShmCoor  (ndimension);    std::vector<int> NodeCoor (ndimension);    std::vector<int> WorldCoor(ndimension);
 | 
			
		||||
  std::vector<int> HyperCoor(ndimension);
 | 
			
		||||
  int dim = 0;
 | 
			
		||||
  for(int l2=0;l2<log2size;l2++){
 | 
			
		||||
    while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
 | 
			
		||||
    ShmDims[dim]*=2;
 | 
			
		||||
    dim=(dim+1)%ndimension;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish torus of processes and nodes with sub-blockings
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  for(int d=0;d<ndimension;d++){
 | 
			
		||||
    NodeDims[d] = WorldDims[d]/ShmDims[d];
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Map Hcube according to physical lattice 
 | 
			
		||||
  // must partition. Loop over dims and find out who would join.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int hcoor = hypercoor;
 | 
			
		||||
  for(int d=0;d<ndimension;d++){
 | 
			
		||||
     int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
 | 
			
		||||
     int msk  = (0x1<<bits)-1;
 | 
			
		||||
     HyperCoor[d]=hcoor & msk;  
 | 
			
		||||
     HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
 | 
			
		||||
     hcoor = hcoor >> bits;
 | 
			
		||||
  } 
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Check processor counts match
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int Nprocessors=1;
 | 
			
		||||
  for(int i=0;i<ndimension;i++){
 | 
			
		||||
    Nprocessors*=processors[i];
 | 
			
		||||
  }
 | 
			
		||||
  assert(WorldSize==Nprocessors);
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish mapping between lexico physics coord and WorldRank
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int rank;
 | 
			
		||||
 | 
			
		||||
  Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode   ,NodeDims);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
 | 
			
		||||
 | 
			
		||||
  Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
 | 
			
		||||
  for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
 | 
			
		||||
  Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Build the new communicator
 | 
			
		||||
  /////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
#else 
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Assert power of two shm_size.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
 | 
			
		||||
  assert(log2size != -1);
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Identify subblock of ranks on node spreading across dims
 | 
			
		||||
  // in a maximally symmetrical way
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ndimension              = processors.size();
 | 
			
		||||
  std::vector<int> processor_coor(ndimension);
 | 
			
		||||
  std::vector<int> WorldDims = processors;   std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
 | 
			
		||||
  std::vector<int> ShmCoor  (ndimension);    std::vector<int> NodeCoor (ndimension);    std::vector<int> WorldCoor(ndimension);
 | 
			
		||||
  int dim = 0;
 | 
			
		||||
  for(int l2=0;l2<log2size;l2++){
 | 
			
		||||
    while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
 | 
			
		||||
    ShmDims[dim]*=2;
 | 
			
		||||
    dim=(dim+1)%ndimension;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish torus of processes and nodes with sub-blockings
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  for(int d=0;d<ndimension;d++){
 | 
			
		||||
    NodeDims[d] = WorldDims[d]/ShmDims[d];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Check processor counts match
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int Nprocessors=1;
 | 
			
		||||
  for(int i=0;i<ndimension;i++){
 | 
			
		||||
    Nprocessors*=processors[i];
 | 
			
		||||
  }
 | 
			
		||||
  assert(WorldSize==Nprocessors);
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Establish mapping between lexico physics coord and WorldRank
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  int rank;
 | 
			
		||||
 | 
			
		||||
  Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode   ,NodeDims);
 | 
			
		||||
  Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
 | 
			
		||||
  for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
 | 
			
		||||
  Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Build the new communicator
 | 
			
		||||
  /////////////////////////////////////////////////////////////////
 | 
			
		||||
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// SHMGET
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef GRID_MPI3_SHMGET
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
{
 | 
			
		||||
  std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
 | 
			
		||||
  assert(_ShmSetup==1);
 | 
			
		||||
  assert(_ShmAlloc==0);
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // allocate the shared windows for our group
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  WorldShmCommBufs.resize(WorldShmSize);
 | 
			
		||||
  std::vector<int> shmids(WorldShmSize);
 | 
			
		||||
 | 
			
		||||
  if ( WorldShmRank == 0 ) {
 | 
			
		||||
    for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
      size_t size = bytes;
 | 
			
		||||
      key_t key   = IPC_PRIVATE;
 | 
			
		||||
      int flags = IPC_CREAT | SHM_R | SHM_W;
 | 
			
		||||
#ifdef SHM_HUGETLB
 | 
			
		||||
      if (Hugepages) flags|=SHM_HUGETLB;
 | 
			
		||||
#endif
 | 
			
		||||
      if ((shmids[r]= shmget(key,size, flags)) ==-1) {
 | 
			
		||||
        int errsv = errno;
 | 
			
		||||
        printf("Errno %d\n",errsv);
 | 
			
		||||
        printf("key   %d\n",key);
 | 
			
		||||
        printf("size  %lld\n",size);
 | 
			
		||||
        printf("flags %d\n",flags);
 | 
			
		||||
        perror("shmget");
 | 
			
		||||
        exit(1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
 | 
			
		||||
  for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
    WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
 | 
			
		||||
    if (WorldShmCommBufs[r] == (uint64_t *)-1) {
 | 
			
		||||
      perror("Shared memory attach failure");
 | 
			
		||||
      shmctl(shmids[r], IPC_RMID, NULL);
 | 
			
		||||
      exit(2);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  ///////////////////////////////////
 | 
			
		||||
  // Mark for clean up
 | 
			
		||||
  ///////////////////////////////////
 | 
			
		||||
  for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
    shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes  = bytes;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Hugetlbfs mapping intended
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef GRID_MPI3_SHMMMAP
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
{
 | 
			
		||||
  std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
 | 
			
		||||
  assert(_ShmSetup==1);
 | 
			
		||||
  assert(_ShmAlloc==0);
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // allocate the shared windows for our group
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  WorldShmCommBufs.resize(WorldShmSize);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Hugetlbfs and others map filesystems as mappable huge pages
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  char shm_name [NAME_MAX];
 | 
			
		||||
  for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
    
 | 
			
		||||
    sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
 | 
			
		||||
    int fd=open(shm_name,O_RDWR|O_CREAT,0666);
 | 
			
		||||
    if ( fd == -1) { 
 | 
			
		||||
      printf("open %s failed\n",shm_name);
 | 
			
		||||
      perror("open hugetlbfs");
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
    int mmap_flag = MAP_SHARED ;
 | 
			
		||||
#ifdef MAP_POPULATE    
 | 
			
		||||
    mmap_flag|=MAP_POPULATE;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAP_HUGETLB
 | 
			
		||||
    if ( flags ) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
#endif
 | 
			
		||||
    void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0); 
 | 
			
		||||
    if ( ptr == (void *)MAP_FAILED ) {    
 | 
			
		||||
      printf("mmap %s failed\n",shm_name);
 | 
			
		||||
      perror("failed mmap");      assert(0);    
 | 
			
		||||
    }
 | 
			
		||||
    assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
    close(fd);
 | 
			
		||||
    WorldShmCommBufs[r] =ptr;
 | 
			
		||||
    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes  = bytes;
 | 
			
		||||
};
 | 
			
		||||
#endif // MMAP
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_MPI3_SHM_NONE
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
{
 | 
			
		||||
  std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
 | 
			
		||||
  assert(_ShmSetup==1);
 | 
			
		||||
  assert(_ShmAlloc==0);
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // allocate the shared windows for our group
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  WorldShmCommBufs.resize(WorldShmSize);
 | 
			
		||||
  
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Hugetlbf and others map filesystems as mappable huge pages
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  char shm_name [NAME_MAX];
 | 
			
		||||
  assert(WorldShmSize == 1);
 | 
			
		||||
  for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
    
 | 
			
		||||
    int fd=-1;
 | 
			
		||||
    int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
 | 
			
		||||
#ifdef MAP_POPULATE    
 | 
			
		||||
    mmap_flag|=MAP_POPULATE;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAP_HUGETLB
 | 
			
		||||
    if ( flags ) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
#endif
 | 
			
		||||
    void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0); 
 | 
			
		||||
    if ( ptr == (void *)MAP_FAILED ) {    
 | 
			
		||||
      printf("mmap %s failed\n",shm_name);
 | 
			
		||||
      perror("failed mmap");      assert(0);    
 | 
			
		||||
    }
 | 
			
		||||
    assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
    close(fd);
 | 
			
		||||
    WorldShmCommBufs[r] =ptr;
 | 
			
		||||
    std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes  = bytes;
 | 
			
		||||
};
 | 
			
		||||
#endif // MMAP
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_MPI3_SHMOPEN
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
 | 
			
		||||
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for 
 | 
			
		||||
// the posix shm virtual file system
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
{ 
 | 
			
		||||
  std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
 | 
			
		||||
  assert(_ShmSetup==1);
 | 
			
		||||
  assert(_ShmAlloc==0); 
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  WorldShmCommBufs.resize(WorldShmSize);
 | 
			
		||||
 | 
			
		||||
  char shm_name [NAME_MAX];
 | 
			
		||||
  if ( WorldShmRank == 0 ) {
 | 
			
		||||
    for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
	
 | 
			
		||||
      size_t size = bytes;
 | 
			
		||||
      
 | 
			
		||||
      struct passwd *pw = getpwuid (getuid());
 | 
			
		||||
      sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
 | 
			
		||||
      
 | 
			
		||||
      shm_unlink(shm_name);
 | 
			
		||||
      int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
 | 
			
		||||
      if ( fd < 0 ) {	perror("failed shm_open");	assert(0);      }
 | 
			
		||||
      ftruncate(fd, size);
 | 
			
		||||
	
 | 
			
		||||
      int mmap_flag = MAP_SHARED;
 | 
			
		||||
#ifdef MAP_POPULATE 
 | 
			
		||||
      mmap_flag |= MAP_POPULATE;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAP_HUGETLB
 | 
			
		||||
      if (flags) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
#endif
 | 
			
		||||
      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
 | 
			
		||||
      
 | 
			
		||||
      std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
 | 
			
		||||
      if ( ptr == (void * )MAP_FAILED ) {       
 | 
			
		||||
	perror("failed mmap");     
 | 
			
		||||
	assert(0);    
 | 
			
		||||
      }
 | 
			
		||||
      assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
      
 | 
			
		||||
      WorldShmCommBufs[r] =ptr;
 | 
			
		||||
      close(fd);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  MPI_Barrier(WorldShmComm);
 | 
			
		||||
  
 | 
			
		||||
  if ( WorldShmRank != 0 ) { 
 | 
			
		||||
    for(int r=0;r<WorldShmSize;r++){
 | 
			
		||||
 | 
			
		||||
      size_t size = bytes ;
 | 
			
		||||
      
 | 
			
		||||
      struct passwd *pw = getpwuid (getuid());
 | 
			
		||||
      sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
 | 
			
		||||
      
 | 
			
		||||
      int fd=shm_open(shm_name,O_RDWR,0666);
 | 
			
		||||
      if ( fd<0 ) {	perror("failed shm_open");	assert(0);      }
 | 
			
		||||
      
 | 
			
		||||
      void * ptr =  mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 | 
			
		||||
      if ( ptr == MAP_FAILED ) {       perror("failed mmap");      assert(0);    }
 | 
			
		||||
      assert(((uint64_t)ptr&0x3F)==0);
 | 
			
		||||
      WorldShmCommBufs[r] =ptr;
 | 
			
		||||
 | 
			
		||||
      close(fd);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
  _ShmAllocBytes = bytes;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Global shared functionality finished
 | 
			
		||||
  // Now move to per communicator functionality
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
 | 
			
		||||
{
 | 
			
		||||
  int rank, size;
 | 
			
		||||
  MPI_Comm_rank(comm,&rank);
 | 
			
		||||
  MPI_Comm_size(comm,&size);
 | 
			
		||||
  ShmRanks.resize(size);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Split into groups that can share memory
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
 | 
			
		||||
  MPI_Comm_rank(ShmComm     ,&ShmRank);
 | 
			
		||||
  MPI_Comm_size(ShmComm     ,&ShmSize);
 | 
			
		||||
  ShmCommBufs.resize(ShmSize);
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Map ShmRank to WorldShmRank and use the right buffer
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  assert (GlobalSharedMemory::ShmAlloc()==1);
 | 
			
		||||
  heap_size = GlobalSharedMemory::ShmAllocBytes();
 | 
			
		||||
  for(int r=0;r<ShmSize;r++){
 | 
			
		||||
 | 
			
		||||
    uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
 | 
			
		||||
 | 
			
		||||
    MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
 | 
			
		||||
 | 
			
		||||
    ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
 | 
			
		||||
    //    std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< "  wsr = "<<wsr<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  ShmBufferFreeAll();
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // find comm ranks in our SHM group (i.e. which ranks are on our node)
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  MPI_Group FullGroup, ShmGroup;
 | 
			
		||||
  MPI_Comm_group (comm   , &FullGroup); 
 | 
			
		||||
  MPI_Comm_group (ShmComm, &ShmGroup);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> ranks(size);   for(int r=0;r<size;r++) ranks[r]=r;
 | 
			
		||||
  MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]); 
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////
 | 
			
		||||
// On node barrier
 | 
			
		||||
//////////////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::ShmBarrier(void)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Barrier  (ShmComm);
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Test the shared memory is working
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::SharedMemoryTest(void)
 | 
			
		||||
{
 | 
			
		||||
  ShmBarrier();
 | 
			
		||||
  if ( ShmRank == 0 ) {
 | 
			
		||||
    for(int r=0;r<ShmSize;r++){
 | 
			
		||||
      uint64_t * check = (uint64_t *) ShmCommBufs[r];
 | 
			
		||||
      check[0] = GlobalSharedMemory::WorldNode;
 | 
			
		||||
      check[1] = r;
 | 
			
		||||
      check[2] = 0x5A5A5A;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ShmBarrier();
 | 
			
		||||
  for(int r=0;r<ShmSize;r++){
 | 
			
		||||
    uint64_t * check = (uint64_t *) ShmCommBufs[r];
 | 
			
		||||
    
 | 
			
		||||
    assert(check[0]==GlobalSharedMemory::WorldNode);
 | 
			
		||||
    assert(check[1]==r);
 | 
			
		||||
    assert(check[2]==0x5A5A5A);
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
  ShmBarrier();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void *SharedMemory::ShmBuffer(int rank)
 | 
			
		||||
{
 | 
			
		||||
  int gpeer = ShmRanks[rank];
 | 
			
		||||
  if (gpeer == MPI_UNDEFINED){
 | 
			
		||||
    return NULL;
 | 
			
		||||
  } else { 
 | 
			
		||||
    return ShmCommBufs[gpeer];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
 | 
			
		||||
{
 | 
			
		||||
  static int count =0;
 | 
			
		||||
  int gpeer = ShmRanks[rank];
 | 
			
		||||
  assert(gpeer!=ShmRank); // never send to self
 | 
			
		||||
  if (gpeer == MPI_UNDEFINED){
 | 
			
		||||
    return NULL;
 | 
			
		||||
  } else { 
 | 
			
		||||
    uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
 | 
			
		||||
    uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
 | 
			
		||||
    return (void *) remote;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
SharedMemory::~SharedMemory()
 | 
			
		||||
{
 | 
			
		||||
  int MPI_is_finalised;  MPI_Finalized(&MPI_is_finalised);
 | 
			
		||||
  if ( !MPI_is_finalised ) { 
 | 
			
		||||
    MPI_Comm_free(&ShmComm);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,128 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/SharedMemory.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
/*Construct from an MPI communicator*/
 | 
			
		||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
 | 
			
		||||
{
 | 
			
		||||
  assert(_ShmSetup==0);
 | 
			
		||||
  WorldComm = 0;
 | 
			
		||||
  WorldRank = 0;
 | 
			
		||||
  WorldSize = 1;
 | 
			
		||||
  WorldShmComm = 0 ;
 | 
			
		||||
  WorldShmRank = 0 ;
 | 
			
		||||
  WorldShmSize = 1 ;
 | 
			
		||||
  WorldNodes   = 1 ;
 | 
			
		||||
  WorldNode    = 0 ;
 | 
			
		||||
  WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
 | 
			
		||||
  WorldShmCommBufs.resize(1);
 | 
			
		||||
  _ShmSetup=1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 | 
			
		||||
{
 | 
			
		||||
  optimal_comm = WorldComm;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Hugetlbfs mapping intended, use anonymous mmap
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
			
		||||
{
 | 
			
		||||
  void * ShmCommBuf ; 
 | 
			
		||||
  assert(_ShmSetup==1);
 | 
			
		||||
  assert(_ShmAlloc==0);
 | 
			
		||||
  int mmap_flag =0;
 | 
			
		||||
#ifdef MAP_ANONYMOUS
 | 
			
		||||
  mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAP_ANON
 | 
			
		||||
  mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAP_HUGETLB
 | 
			
		||||
  if ( flags ) mmap_flag |= MAP_HUGETLB;
 | 
			
		||||
#endif
 | 
			
		||||
  ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); 
 | 
			
		||||
  if (ShmCommBuf == (void *)MAP_FAILED) {
 | 
			
		||||
    perror("mmap failed ");
 | 
			
		||||
    exit(EXIT_FAILURE);  
 | 
			
		||||
  }
 | 
			
		||||
#ifdef MADV_HUGEPAGE
 | 
			
		||||
  if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
 | 
			
		||||
#endif
 | 
			
		||||
  bzero(ShmCommBuf,bytes);
 | 
			
		||||
  WorldShmCommBufs[0] = ShmCommBuf;
 | 
			
		||||
  _ShmAllocBytes=bytes;
 | 
			
		||||
  _ShmAlloc=1;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Global shared functionality finished
 | 
			
		||||
  // Now move to per communicator functionality
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
 | 
			
		||||
{
 | 
			
		||||
  assert(GlobalSharedMemory::ShmAlloc()==1);
 | 
			
		||||
  ShmRanks.resize(1);
 | 
			
		||||
  ShmCommBufs.resize(1);
 | 
			
		||||
  ShmRanks[0] = 0;
 | 
			
		||||
  ShmRank     = 0;
 | 
			
		||||
  ShmSize     = 1;
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Map ShmRank to WorldShmRank and use the right buffer
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////
 | 
			
		||||
  ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
 | 
			
		||||
  heap_size      = GlobalSharedMemory::ShmAllocBytes();
 | 
			
		||||
  ShmBufferFreeAll();
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
//////////////////////////////////////////////////////////////////
 | 
			
		||||
// On node barrier
 | 
			
		||||
//////////////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::ShmBarrier(void){ return ; }
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Test the shared memory is working
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void SharedMemory::SharedMemoryTest(void) { return; }
 | 
			
		||||
 | 
			
		||||
void *SharedMemory::ShmBuffer(int rank)
 | 
			
		||||
{
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
 | 
			
		||||
{
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
SharedMemory::~SharedMemory()
 | 
			
		||||
{};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										18920
									
								
								Grid/json/json.hpp
									
									
									
									
									
								
							
							
						
						
									
										18920
									
								
								Grid/json/json.hpp
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -1,729 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/parallelIO/BinaryIO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: Guido Cossu<guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_BINARY_IO_H
 | 
			
		||||
#define GRID_BINARY_IO_H
 | 
			
		||||
 | 
			
		||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) 
 | 
			
		||||
#define USE_MPI_IO
 | 
			
		||||
#else
 | 
			
		||||
#undef  USE_MPI_IO
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_ENDIAN_H
 | 
			
		||||
#include <endian.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <arpa/inet.h>
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Byte reversal garbage
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
inline uint32_t byte_reverse32(uint32_t f) { 
 | 
			
		||||
      f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      return f;
 | 
			
		||||
}
 | 
			
		||||
inline uint64_t byte_reverse64(uint64_t f) { 
 | 
			
		||||
  uint64_t g;
 | 
			
		||||
  g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
  g = g << 32;
 | 
			
		||||
  f = f >> 32;
 | 
			
		||||
  g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
  return g;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if BYTE_ORDER == BIG_ENDIAN 
 | 
			
		||||
inline uint64_t Grid_ntohll(uint64_t A) { return A; }
 | 
			
		||||
#else
 | 
			
		||||
inline uint64_t Grid_ntohll(uint64_t A) { 
 | 
			
		||||
  return byte_reverse64(A);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
// A little helper
 | 
			
		||||
inline void removeWhitespace(std::string &key)
 | 
			
		||||
{
 | 
			
		||||
  key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Static class holding the parallel IO code
 | 
			
		||||
// Could just use a namespace
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
class BinaryIO {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // more byte manipulation helpers
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  template<class vobj> static inline void Uint32Checksum(Lattice<vobj> &lat,uint32_t &nersc_csum)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = lat._grid;
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
 | 
			
		||||
    std::vector<sobj> scalardata(lsites); 
 | 
			
		||||
    unvectorizeToLexOrdArray(scalardata,lat);    
 | 
			
		||||
 | 
			
		||||
    NerscChecksum(grid,scalardata,nersc_csum);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class fobj>
 | 
			
		||||
  static inline void NerscChecksum(GridBase *grid, std::vector<fobj> &fbuf, uint32_t &nersc_csum)
 | 
			
		||||
  {
 | 
			
		||||
    const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
 | 
			
		||||
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
    if (fbuf.size() == 1)
 | 
			
		||||
    {
 | 
			
		||||
      lsites = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
PARALLEL_REGION
 | 
			
		||||
    {
 | 
			
		||||
      uint32_t nersc_csum_thr = 0;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
      for (uint64_t local_site = 0; local_site < lsites; local_site++)
 | 
			
		||||
      {
 | 
			
		||||
        uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
 | 
			
		||||
        for (uint64_t j = 0; j < size32; j++)
 | 
			
		||||
        {
 | 
			
		||||
          nersc_csum_thr = nersc_csum_thr + site_buf[j];
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
PARALLEL_CRITICAL
 | 
			
		||||
      {
 | 
			
		||||
        nersc_csum += nersc_csum_thr;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class fobj> static inline void ScidacChecksum(GridBase *grid,std::vector<fobj> &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    int nd = grid->_ndimension;
 | 
			
		||||
 | 
			
		||||
    uint64_t lsites              =grid->lSites();
 | 
			
		||||
    if (fbuf.size()==1) {
 | 
			
		||||
      lsites=1;
 | 
			
		||||
    }
 | 
			
		||||
    std::vector<int> local_vol   =grid->LocalDimensions();
 | 
			
		||||
    std::vector<int> local_start =grid->LocalStarts();
 | 
			
		||||
    std::vector<int> global_vol  =grid->FullDimensions();
 | 
			
		||||
 | 
			
		||||
PARALLEL_REGION
 | 
			
		||||
    { 
 | 
			
		||||
      std::vector<int> coor(nd);
 | 
			
		||||
      uint32_t scidac_csuma_thr=0;
 | 
			
		||||
      uint32_t scidac_csumb_thr=0;
 | 
			
		||||
      uint32_t site_crc=0;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
      for(uint64_t local_site=0;local_site<lsites;local_site++){
 | 
			
		||||
 | 
			
		||||
	uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
 | 
			
		||||
 | 
			
		||||
	/* 
 | 
			
		||||
	 * Scidac csum  is rather more heavyweight
 | 
			
		||||
	 * FIXME -- 128^3 x 256 x 16 will overflow.
 | 
			
		||||
	 */
 | 
			
		||||
	
 | 
			
		||||
	int global_site;
 | 
			
		||||
 | 
			
		||||
	Lexicographic::CoorFromIndex(coor,local_site,local_vol);
 | 
			
		||||
 | 
			
		||||
	for(int d=0;d<nd;d++) {
 | 
			
		||||
	  coor[d] = coor[d]+local_start[d];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Lexicographic::IndexFromCoor(coor,global_site,global_vol);
 | 
			
		||||
 | 
			
		||||
	uint32_t gsite29   = global_site%29;
 | 
			
		||||
	uint32_t gsite31   = global_site%31;
 | 
			
		||||
	
 | 
			
		||||
	site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
 | 
			
		||||
	//	std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
 | 
			
		||||
	//	std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
 | 
			
		||||
	scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
 | 
			
		||||
	scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
PARALLEL_CRITICAL
 | 
			
		||||
      {
 | 
			
		||||
	scidac_csuma^= scidac_csuma_thr;
 | 
			
		||||
	scidac_csumb^= scidac_csumb_thr;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Network is big endian
 | 
			
		||||
  static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} 
 | 
			
		||||
  static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} 
 | 
			
		||||
 | 
			
		||||
  static inline void be32toh_v(void *file_object,uint64_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint32_t * f = (uint32_t *)file_object;
 | 
			
		||||
    uint64_t count = bytes/sizeof(uint32_t);
 | 
			
		||||
    parallel_for(uint64_t i=0;i<count;i++){  
 | 
			
		||||
      f[i] = ntohl(f[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // LE must Swap and switch to host
 | 
			
		||||
  static inline void le32toh_v(void *file_object,uint64_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint32_t *fp = (uint32_t *)file_object;
 | 
			
		||||
    uint32_t f;
 | 
			
		||||
 | 
			
		||||
    uint64_t count = bytes/sizeof(uint32_t);
 | 
			
		||||
    parallel_for(uint64_t i=0;i<count;i++){  
 | 
			
		||||
      f = fp[i];
 | 
			
		||||
      // got network order and the network to host
 | 
			
		||||
      f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      fp[i] = ntohl(f);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // BE is same as network
 | 
			
		||||
  static inline void be64toh_v(void *file_object,uint64_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t * f = (uint64_t *)file_object;
 | 
			
		||||
    uint64_t count = bytes/sizeof(uint64_t);
 | 
			
		||||
    parallel_for(uint64_t i=0;i<count;i++){  
 | 
			
		||||
      f[i] = Grid_ntohll(f[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // LE must swap and switch;
 | 
			
		||||
  static inline void le64toh_v(void *file_object,uint64_t bytes)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t *fp = (uint64_t *)file_object;
 | 
			
		||||
    uint64_t f,g;
 | 
			
		||||
    
 | 
			
		||||
    uint64_t count = bytes/sizeof(uint64_t);
 | 
			
		||||
    parallel_for(uint64_t i=0;i<count;i++){  
 | 
			
		||||
      f = fp[i];
 | 
			
		||||
      // got network order and the network to host
 | 
			
		||||
      g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      g = g << 32;
 | 
			
		||||
      f = f >> 32;
 | 
			
		||||
      g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; 
 | 
			
		||||
      fp[i] = Grid_ntohll(g);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Real action:
 | 
			
		||||
  // Read or Write distributed lexico array of ANY object to a specific location in file 
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  static const int BINARYIO_MASTER_APPEND = 0x10;
 | 
			
		||||
  static const int BINARYIO_UNORDERED     = 0x08;
 | 
			
		||||
  static const int BINARYIO_LEXICOGRAPHIC = 0x04;
 | 
			
		||||
  static const int BINARYIO_READ          = 0x02;
 | 
			
		||||
  static const int BINARYIO_WRITE         = 0x01;
 | 
			
		||||
 | 
			
		||||
  template<class word,class fobj>
 | 
			
		||||
  static inline void IOobject(word w,
 | 
			
		||||
			      GridBase *grid,
 | 
			
		||||
			      std::vector<fobj> &iodata,
 | 
			
		||||
			      std::string file,
 | 
			
		||||
			      uint64_t& offset,
 | 
			
		||||
			      const std::string &format, int control,
 | 
			
		||||
			      uint32_t &nersc_csum,
 | 
			
		||||
			      uint32_t &scidac_csuma,
 | 
			
		||||
			      uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    grid->Barrier();
 | 
			
		||||
    GridStopWatch timer; 
 | 
			
		||||
    GridStopWatch bstimer;
 | 
			
		||||
    
 | 
			
		||||
    nersc_csum=0;
 | 
			
		||||
    scidac_csuma=0;
 | 
			
		||||
    scidac_csumb=0;
 | 
			
		||||
 | 
			
		||||
    int ndim                 = grid->Dimensions();
 | 
			
		||||
    int nrank                = grid->ProcessorCount();
 | 
			
		||||
    int myrank               = grid->ThisRank();
 | 
			
		||||
 | 
			
		||||
    std::vector<int>  psizes = grid->ProcessorGrid(); 
 | 
			
		||||
    std::vector<int>  pcoor  = grid->ThisProcessorCoor();
 | 
			
		||||
    std::vector<int> gLattice= grid->GlobalDimensions();
 | 
			
		||||
    std::vector<int> lLattice= grid->LocalDimensions();
 | 
			
		||||
 | 
			
		||||
    std::vector<int> lStart(ndim);
 | 
			
		||||
    std::vector<int> gStart(ndim);
 | 
			
		||||
 | 
			
		||||
    // Flatten the file
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
    if ( control & BINARYIO_MASTER_APPEND )  {
 | 
			
		||||
      assert(iodata.size()==1);
 | 
			
		||||
    } else {
 | 
			
		||||
      assert(lsites==iodata.size());
 | 
			
		||||
    }
 | 
			
		||||
    for(int d=0;d<ndim;d++){
 | 
			
		||||
      gStart[d] = lLattice[d]*pcoor[d];
 | 
			
		||||
      lStart[d] = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
    std::vector<int> distribs(ndim,MPI_DISTRIBUTE_BLOCK);
 | 
			
		||||
    std::vector<int> dargs   (ndim,MPI_DISTRIBUTE_DFLT_DARG);
 | 
			
		||||
    MPI_Datatype mpiObject;
 | 
			
		||||
    MPI_Datatype fileArray;
 | 
			
		||||
    MPI_Datatype localArray;
 | 
			
		||||
    MPI_Datatype mpiword;
 | 
			
		||||
    MPI_Offset disp = offset;
 | 
			
		||||
    MPI_File fh ;
 | 
			
		||||
    MPI_Status status;
 | 
			
		||||
    int numword;
 | 
			
		||||
 | 
			
		||||
    if ( sizeof( word ) == sizeof(float ) ) {
 | 
			
		||||
      numword = sizeof(fobj)/sizeof(float);
 | 
			
		||||
      mpiword = MPI_FLOAT;
 | 
			
		||||
    } else {
 | 
			
		||||
      numword = sizeof(fobj)/sizeof(double);
 | 
			
		||||
      mpiword = MPI_DOUBLE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Sobj in MPI phrasing
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    int ierr;
 | 
			
		||||
    ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject);    assert(ierr==0);
 | 
			
		||||
    ierr = MPI_Type_commit(&mpiObject);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // File global array data type
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray);    assert(ierr==0);
 | 
			
		||||
    ierr=MPI_Type_commit(&fileArray);    assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // local lattice array
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray);    assert(ierr==0);
 | 
			
		||||
    ierr=MPI_Type_commit(&localArray);    assert(ierr==0);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Byte order
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Do the I/O
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    if ( control & BINARYIO_READ ) { 
 | 
			
		||||
 | 
			
		||||
      timer.Start();
 | 
			
		||||
 | 
			
		||||
      if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
	std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
 | 
			
		||||
	ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);    assert(ierr==0);
 | 
			
		||||
	ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);    assert(ierr==0);
 | 
			
		||||
	ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status);    assert(ierr==0);
 | 
			
		||||
	MPI_File_close(&fh);
 | 
			
		||||
	MPI_Type_free(&fileArray);
 | 
			
		||||
	MPI_Type_free(&localArray);
 | 
			
		||||
#else 
 | 
			
		||||
	assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
      } else {
 | 
			
		||||
	std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
 | 
			
		||||
                  << iodata.size() * sizeof(fobj) << " bytes" << std::endl;
 | 
			
		||||
        std::ifstream fin;
 | 
			
		||||
	fin.open(file, std::ios::binary | std::ios::in);
 | 
			
		||||
        if (control & BINARYIO_MASTER_APPEND)
 | 
			
		||||
        {
 | 
			
		||||
          fin.seekg(-sizeof(fobj), fin.end);
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          fin.seekg(offset + myrank * lsites * sizeof(fobj));
 | 
			
		||||
        }
 | 
			
		||||
        fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
 | 
			
		||||
        assert(fin.fail() == 0);
 | 
			
		||||
        fin.close();
 | 
			
		||||
      }
 | 
			
		||||
      timer.Stop();
 | 
			
		||||
 | 
			
		||||
      grid->Barrier();
 | 
			
		||||
 | 
			
		||||
      bstimer.Start();
 | 
			
		||||
      ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
 | 
			
		||||
      if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee32)    le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee64)    le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      NerscChecksum(grid,iodata,nersc_csum);
 | 
			
		||||
      bstimer.Stop();
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if ( control & BINARYIO_WRITE ) { 
 | 
			
		||||
 | 
			
		||||
      bstimer.Start();
 | 
			
		||||
      NerscChecksum(grid,iodata,nersc_csum);
 | 
			
		||||
      if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee32)    htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      if (ieee64)    htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
 | 
			
		||||
      ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
 | 
			
		||||
      bstimer.Stop();
 | 
			
		||||
 | 
			
		||||
      grid->Barrier();
 | 
			
		||||
 | 
			
		||||
      timer.Start();
 | 
			
		||||
      if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
        std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
 | 
			
		||||
        ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
 | 
			
		||||
	//        std::cout << GridLogMessage << "Checking for errors" << std::endl;
 | 
			
		||||
        if (ierr != MPI_SUCCESS)
 | 
			
		||||
        {
 | 
			
		||||
          char error_string[BUFSIZ];
 | 
			
		||||
          int length_of_error_string, error_class;
 | 
			
		||||
 | 
			
		||||
          MPI_Error_class(ierr, &error_class);
 | 
			
		||||
          MPI_Error_string(error_class, error_string, &length_of_error_string);
 | 
			
		||||
          fprintf(stderr, "%3d: %s\n", myrank, error_string);
 | 
			
		||||
          MPI_Error_string(ierr, error_string, &length_of_error_string);
 | 
			
		||||
          fprintf(stderr, "%3d: %s\n", myrank, error_string);
 | 
			
		||||
          MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
 | 
			
		||||
        ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
 | 
			
		||||
        assert(ierr == 0);
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
 | 
			
		||||
        ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
 | 
			
		||||
        assert(ierr == 0);
 | 
			
		||||
 | 
			
		||||
        MPI_Offset os;
 | 
			
		||||
        MPI_File_get_position(fh, &os);
 | 
			
		||||
        MPI_File_get_byte_offset(fh, os, &disp);
 | 
			
		||||
        offset = disp;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        MPI_File_close(&fh);
 | 
			
		||||
        MPI_Type_free(&fileArray);
 | 
			
		||||
        MPI_Type_free(&localArray);
 | 
			
		||||
#else 
 | 
			
		||||
	assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
      } else { 
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
 | 
			
		||||
                  << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
 | 
			
		||||
        
 | 
			
		||||
	std::ofstream fout; 
 | 
			
		||||
	fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
 | 
			
		||||
	try {
 | 
			
		||||
	  if (offset) { // Must already exist and contain data
 | 
			
		||||
	    fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
 | 
			
		||||
	  } else {     // Allow create
 | 
			
		||||
	    fout.open(file,std::ios::binary|std::ios::out);
 | 
			
		||||
	  }
 | 
			
		||||
	} catch (const std::fstream::failure& exc) {
 | 
			
		||||
	  std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
 | 
			
		||||
	  std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
 | 
			
		||||
	  //	  std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
	  MPI_Abort(MPI_COMM_WORLD,1);
 | 
			
		||||
#else
 | 
			
		||||
	  exit(1);
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	if ( control & BINARYIO_MASTER_APPEND )  {
 | 
			
		||||
	  try {
 | 
			
		||||
	    fout.seekp(0,fout.end);
 | 
			
		||||
	  } catch (const std::fstream::failure& exc) {
 | 
			
		||||
	    std::cout << "Exception in seeking file end " << file << std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
	} else {
 | 
			
		||||
	  try { 
 | 
			
		||||
	    fout.seekp(offset+myrank*lsites*sizeof(fobj));
 | 
			
		||||
	  } catch (const std::fstream::failure& exc) {
 | 
			
		||||
	    std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	try {
 | 
			
		||||
	  fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
 | 
			
		||||
	}
 | 
			
		||||
	catch (const std::fstream::failure& exc) {
 | 
			
		||||
	  std::cout << "Exception in writing file " << file << std::endl;
 | 
			
		||||
	  std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
 | 
			
		||||
#ifdef USE_MPI_IO
 | 
			
		||||
	  MPI_Abort(MPI_COMM_WORLD,1);
 | 
			
		||||
#else
 | 
			
		||||
	  exit(1);
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
  offset  = fout.tellp();
 | 
			
		||||
	fout.close();
 | 
			
		||||
      }
 | 
			
		||||
      timer.Stop();
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    std::cout<<GridLogMessage<<"IOobject: ";
 | 
			
		||||
    if ( control & BINARYIO_READ) std::cout << " read  ";
 | 
			
		||||
    else                          std::cout << " write ";
 | 
			
		||||
    uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
 | 
			
		||||
    std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed()  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Safety check
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // if the data size is 1 we do not want to sum over the MPI ranks
 | 
			
		||||
    if (iodata.size() != 1){
 | 
			
		||||
      grid->Barrier();
 | 
			
		||||
      grid->GlobalSum(nersc_csum);
 | 
			
		||||
      grid->GlobalXOR(scidac_csuma);
 | 
			
		||||
      grid->GlobalXOR(scidac_csumb);
 | 
			
		||||
      grid->Barrier();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Read a Lattice of object
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
  static inline void readLatticeObject(Lattice<vobj> &Umu,
 | 
			
		||||
				       std::string file,
 | 
			
		||||
				       munger munge,
 | 
			
		||||
				       uint64_t offset,
 | 
			
		||||
				       const std::string &format,
 | 
			
		||||
				       uint32_t &nersc_csum,
 | 
			
		||||
				       uint32_t &scidac_csuma,
 | 
			
		||||
				       uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    typedef typename vobj::Realified::scalar_type word;    word w=0;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
 | 
			
		||||
    std::vector<sobj> scalardata(lsites); 
 | 
			
		||||
    std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here
 | 
			
		||||
    
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    GridStopWatch timer; 
 | 
			
		||||
    timer.Start();
 | 
			
		||||
 | 
			
		||||
    parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
 | 
			
		||||
 | 
			
		||||
    vectorizeFromLexOrdArray(scalardata,Umu);    
 | 
			
		||||
    grid->Barrier();
 | 
			
		||||
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogMessage<<"readLatticeObject: vectorize overhead "<<timer.Elapsed()  <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Write a Lattice of object
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
    static inline void writeLatticeObject(Lattice<vobj> &Umu,
 | 
			
		||||
					  std::string file,
 | 
			
		||||
					  munger munge,
 | 
			
		||||
					  uint64_t offset,
 | 
			
		||||
					  const std::string &format,
 | 
			
		||||
					  uint32_t &nersc_csum,
 | 
			
		||||
					  uint32_t &scidac_csuma,
 | 
			
		||||
					  uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    typedef typename vobj::Realified::scalar_type word;    word w=0;
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
 | 
			
		||||
    std::vector<sobj> scalardata(lsites); 
 | 
			
		||||
    std::vector<fobj>     iodata(lsites); // Munge, checksum, byte order in here
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Munge [ .e.g 3rd row recon ]
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    GridStopWatch timer; timer.Start();
 | 
			
		||||
    unvectorizeToLexOrdArray(scalardata,Umu);    
 | 
			
		||||
 | 
			
		||||
    parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
 | 
			
		||||
 | 
			
		||||
    grid->Barrier();
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed()  <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Read a RNG;  use IOobject and lexico map to an array of state 
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  static inline void readRNG(GridSerialRNG &serial,
 | 
			
		||||
			     GridParallelRNG ¶llel,
 | 
			
		||||
			     std::string file,
 | 
			
		||||
			     uint64_t offset,
 | 
			
		||||
			     uint32_t &nersc_csum,
 | 
			
		||||
			     uint32_t &scidac_csuma,
 | 
			
		||||
			     uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename GridSerialRNG::RngStateType RngStateType;
 | 
			
		||||
    const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
    typedef std::array<RngStateType,RngStateCount> RNGstate;
 | 
			
		||||
    typedef RngStateType word;    word w=0;
 | 
			
		||||
 | 
			
		||||
    std::string format = "IEEE32BIG";
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = parallel._grid;
 | 
			
		||||
    uint64_t gsites = grid->gSites();
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum_tmp   = 0;
 | 
			
		||||
    uint32_t scidac_csuma_tmp = 0;
 | 
			
		||||
    uint32_t scidac_csumb_tmp = 0;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch timer;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::vector<RNGstate> iodata(lsites);
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    timer.Start();
 | 
			
		||||
    parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
      std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
 | 
			
		||||
      parallel.SetState(tmp,lidx);
 | 
			
		||||
    }
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
 | 
			
		||||
    iodata.resize(1);
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND,
 | 
			
		||||
	     nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
      std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin());
 | 
			
		||||
      serial.SetState(tmp,0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    nersc_csum   = nersc_csum   + nersc_csum_tmp;
 | 
			
		||||
    scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
 | 
			
		||||
    scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file nersc_checksum   " << std::hex << nersc_csum << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Write a RNG; lexico map to an array of state and use IOobject
 | 
			
		||||
  //////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  static inline void writeRNG(GridSerialRNG &serial,
 | 
			
		||||
			      GridParallelRNG ¶llel,
 | 
			
		||||
			      std::string file,
 | 
			
		||||
			      uint64_t offset,
 | 
			
		||||
			      uint32_t &nersc_csum,
 | 
			
		||||
			      uint32_t &scidac_csuma,
 | 
			
		||||
			      uint32_t &scidac_csumb)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename GridSerialRNG::RngStateType RngStateType;
 | 
			
		||||
    typedef RngStateType word; word w=0;
 | 
			
		||||
    const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
    typedef std::array<RngStateType,RngStateCount> RNGstate;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = parallel._grid;
 | 
			
		||||
    uint64_t gsites = grid->gSites();
 | 
			
		||||
    uint64_t lsites = grid->lSites();
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum_tmp;
 | 
			
		||||
    uint32_t scidac_csuma_tmp;
 | 
			
		||||
    uint32_t scidac_csumb_tmp;
 | 
			
		||||
 | 
			
		||||
    GridStopWatch timer;
 | 
			
		||||
    std::string format = "IEEE32BIG";
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl;
 | 
			
		||||
 | 
			
		||||
    timer.Start();
 | 
			
		||||
    std::vector<RNGstate> iodata(lsites);
 | 
			
		||||
    parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
      parallel.GetState(tmp,lidx);
 | 
			
		||||
      std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
 | 
			
		||||
    }
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
    iodata.resize(1);
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<RngStateType> tmp(RngStateCount);
 | 
			
		||||
      serial.GetState(tmp,0);
 | 
			
		||||
      std::copy(tmp.begin(),tmp.end(),iodata[0].begin());
 | 
			
		||||
    }
 | 
			
		||||
    IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
 | 
			
		||||
	     nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
 | 
			
		||||
 | 
			
		||||
    nersc_csum   = nersc_csum   + nersc_csum_tmp;
 | 
			
		||||
    scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
 | 
			
		||||
    scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum    << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,875 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/parallelIO/IldgIO.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ILDG_IO_H
 | 
			
		||||
#define GRID_ILDG_IO_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <map>
 | 
			
		||||
 | 
			
		||||
#include <pwd.h>
 | 
			
		||||
#include <sys/utsname.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
//C-Lime is a must have for this functionality
 | 
			
		||||
extern "C" {  
 | 
			
		||||
#include "lime.h"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
  // Encode word types as strings
 | 
			
		||||
  /////////////////////////////////
 | 
			
		||||
 template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic<double>  (void){ return std::string("D"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic<float>   (void){ return std::string("F"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
 | 
			
		||||
 template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////
 | 
			
		||||
  // Encode a generic tensor as a string
 | 
			
		||||
  /////////////////////////////////////////
 | 
			
		||||
 template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) { 
 | 
			
		||||
 | 
			
		||||
   typedef typename getPrecision<vobj>::real_scalar_type stype;
 | 
			
		||||
 | 
			
		||||
   int _ColourN       = indexRank<ColourIndex,vobj>();
 | 
			
		||||
   int _ColourScalar  =  isScalar<ColourIndex,vobj>();
 | 
			
		||||
   int _ColourVector  =  isVector<ColourIndex,vobj>();
 | 
			
		||||
   int _ColourMatrix  =  isMatrix<ColourIndex,vobj>();
 | 
			
		||||
 | 
			
		||||
   int _SpinN       = indexRank<SpinIndex,vobj>();
 | 
			
		||||
   int _SpinScalar  =  isScalar<SpinIndex,vobj>();
 | 
			
		||||
   int _SpinVector  =  isVector<SpinIndex,vobj>();
 | 
			
		||||
   int _SpinMatrix  =  isMatrix<SpinIndex,vobj>();
 | 
			
		||||
 | 
			
		||||
   int _LorentzN       = indexRank<LorentzIndex,vobj>();
 | 
			
		||||
   int _LorentzScalar  =  isScalar<LorentzIndex,vobj>();
 | 
			
		||||
   int _LorentzVector  =  isVector<LorentzIndex,vobj>();
 | 
			
		||||
   int _LorentzMatrix  =  isMatrix<LorentzIndex,vobj>();
 | 
			
		||||
 | 
			
		||||
   std::stringstream stream;
 | 
			
		||||
 | 
			
		||||
   stream << "GRID_";
 | 
			
		||||
   stream << ScidacWordMnemonic<stype>();
 | 
			
		||||
 | 
			
		||||
   if ( _LorentzVector )   stream << "_LorentzVector"<<_LorentzN;
 | 
			
		||||
   if ( _LorentzMatrix )   stream << "_LorentzMatrix"<<_LorentzN;
 | 
			
		||||
 | 
			
		||||
   if ( _SpinVector )   stream << "_SpinVector"<<_SpinN;
 | 
			
		||||
   if ( _SpinMatrix )   stream << "_SpinMatrix"<<_SpinN;
 | 
			
		||||
 | 
			
		||||
   if ( _ColourVector )   stream << "_ColourVector"<<_ColourN;
 | 
			
		||||
   if ( _ColourMatrix )   stream << "_ColourMatrix"<<_ColourN;
 | 
			
		||||
 | 
			
		||||
   if ( _ColourScalar && _LorentzScalar && _SpinScalar )   stream << "_Complex";
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   typesize = sizeof(typename vobj::scalar_type);
 | 
			
		||||
 | 
			
		||||
   if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
 | 
			
		||||
   else                 typesize*= _ColourN;
 | 
			
		||||
 | 
			
		||||
   if ( _SpinMatrix )   typesize*= _SpinN*_SpinN;
 | 
			
		||||
   else                 typesize*= _SpinN;
 | 
			
		||||
 | 
			
		||||
   colors    = _ColourN;
 | 
			
		||||
   spins     = _SpinN;
 | 
			
		||||
   datacount = _LorentzN;
 | 
			
		||||
 | 
			
		||||
   return stream.str();
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) { 
 | 
			
		||||
   return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 ////////////////////////////////////////////////////////////
 | 
			
		||||
 // Helper to fill out metadata
 | 
			
		||||
 ////////////////////////////////////////////////////////////
 | 
			
		||||
 template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
 | 
			
		||||
					  FieldMetaData &header,
 | 
			
		||||
					  scidacRecord & _scidacRecord,
 | 
			
		||||
					  scidacFile   & _scidacFile) 
 | 
			
		||||
 {
 | 
			
		||||
   typedef typename getPrecision<vobj>::real_scalar_type stype;
 | 
			
		||||
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   // Pull Grid's metadata
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   PrepareMetaData(field,header);
 | 
			
		||||
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   // Scidac Private File structure
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   _scidacFile              = scidacFile(field._grid);
 | 
			
		||||
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   // Scidac Private Record structure
 | 
			
		||||
   /////////////////////////////////////
 | 
			
		||||
   scidacRecord sr;
 | 
			
		||||
   sr.datatype   = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
 | 
			
		||||
   sr.date       = header.creation_date;
 | 
			
		||||
   sr.precision  = ScidacWordMnemonic<stype>();
 | 
			
		||||
   sr.recordtype = GRID_IO_FIELD;
 | 
			
		||||
 | 
			
		||||
   _scidacRecord = sr;
 | 
			
		||||
 | 
			
		||||
   //   std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 ///////////////////////////////////////////////////////
 | 
			
		||||
 // Scidac checksum
 | 
			
		||||
 ///////////////////////////////////////////////////////
 | 
			
		||||
 static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
 | 
			
		||||
 {
 | 
			
		||||
   uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
 | 
			
		||||
   uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
 | 
			
		||||
   if ( scidac_csuma !=scidac_checksuma) return 0;
 | 
			
		||||
   if ( scidac_csumb !=scidac_checksumb) return 0;
 | 
			
		||||
   return 1;
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Lime, ILDG and Scidac I/O classes
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
class GridLimeReader : public BinaryIO {
 | 
			
		||||
 public:
 | 
			
		||||
   ///////////////////////////////////////////////////
 | 
			
		||||
   // FIXME: format for RNG? Now just binary out instead
 | 
			
		||||
   ///////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
   FILE       *File;
 | 
			
		||||
   LimeReader *LimeR;
 | 
			
		||||
   std::string filename;
 | 
			
		||||
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   // Open the file
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   void open(const std::string &_filename) 
 | 
			
		||||
   {
 | 
			
		||||
     filename= _filename;
 | 
			
		||||
     File = fopen(filename.c_str(), "r");
 | 
			
		||||
     if (File == nullptr)
 | 
			
		||||
     {
 | 
			
		||||
       std::cerr << "cannot open file '" << filename << "'" << std::endl;
 | 
			
		||||
       abort();
 | 
			
		||||
     }
 | 
			
		||||
     LimeR = limeCreateReader(File);
 | 
			
		||||
   }
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   // Close the file
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   void close(void){
 | 
			
		||||
     fclose(File);
 | 
			
		||||
     //     limeDestroyReader(LimeR);
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Read a generic lattice field and verify checksum
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    scidacChecksum scidacChecksum_;
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
 | 
			
		||||
    std::string format = getFormatString<vobj>();
 | 
			
		||||
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
 | 
			
		||||
      uint64_t file_bytes =limeReaderBytes(LimeR);
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
 | 
			
		||||
      //      std::cout << GridLogMessage<< " readLimeObject seeking "<<  record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
 | 
			
		||||
 | 
			
		||||
      if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) {
 | 
			
		||||
 | 
			
		||||
	//	std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
 | 
			
		||||
 | 
			
		||||
	uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
 | 
			
		||||
 | 
			
		||||
	//	std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
 | 
			
		||||
	//	std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
 | 
			
		||||
	//	std::cout << "R Payload expected " <<PayloadSize<<std::endl;
 | 
			
		||||
	//	std::cout << "R file size " <<file_bytes <<std::endl;
 | 
			
		||||
 | 
			
		||||
	assert(PayloadSize == file_bytes);// Must match or user error
 | 
			
		||||
 | 
			
		||||
	uint64_t offset= ftello(File);
 | 
			
		||||
	//	std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
 | 
			
		||||
	BinarySimpleMunger<sobj,sobj> munge;
 | 
			
		||||
	BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	// Insist checksum is next record
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	// Verify checksums
 | 
			
		||||
	/////////////////////////////////////////////
 | 
			
		||||
	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Read a generic serialisable object
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  void readLimeObject(std::string &xmlstring,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    // should this be a do while; can we miss a first record??
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
 | 
			
		||||
      uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
 | 
			
		||||
 | 
			
		||||
      if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) {
 | 
			
		||||
 | 
			
		||||
	//	std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
 | 
			
		||||
	std::vector<char> xmlc(nbytes+1,'\0');
 | 
			
		||||
	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);    
 | 
			
		||||
	//	std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
 | 
			
		||||
 | 
			
		||||
   xmlstring = std::string(&xmlc[0]);
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }  
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    std::string xmlstring;
 | 
			
		||||
 | 
			
		||||
    readLimeObject(xmlstring, record_name);
 | 
			
		||||
	  XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	  read(RD,object_name,object);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GridLimeWriter : public BinaryIO 
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
   ///////////////////////////////////////////////////
 | 
			
		||||
   // FIXME: format for RNG? Now just binary out instead
 | 
			
		||||
   // FIXME: collective calls or not ?
 | 
			
		||||
   //      : must know if I am the I/O boss
 | 
			
		||||
   ///////////////////////////////////////////////////
 | 
			
		||||
   FILE       *File;
 | 
			
		||||
   LimeWriter *LimeW;
 | 
			
		||||
   std::string filename;
 | 
			
		||||
   bool        boss_node;
 | 
			
		||||
   GridLimeWriter( bool isboss = true) {
 | 
			
		||||
     boss_node = isboss;
 | 
			
		||||
   }
 | 
			
		||||
   void open(const std::string &_filename) { 
 | 
			
		||||
     filename= _filename;
 | 
			
		||||
     if ( boss_node ) {
 | 
			
		||||
       File = fopen(filename.c_str(), "w");
 | 
			
		||||
       LimeW = limeCreateWriter(File); assert(LimeW != NULL );
 | 
			
		||||
     }
 | 
			
		||||
   }
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   // Close the file
 | 
			
		||||
   /////////////////////////////////////////////
 | 
			
		||||
   void close(void) {
 | 
			
		||||
     if ( boss_node ) {
 | 
			
		||||
       fclose(File);
 | 
			
		||||
     }
 | 
			
		||||
     //  limeDestroyWriter(LimeW);
 | 
			
		||||
   }
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
  // Lime utility functions
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
  int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
 | 
			
		||||
  {
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      LimeRecordHeader *h;
 | 
			
		||||
      h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
 | 
			
		||||
      assert(limeWriteRecordHeader(h, LimeW) >= 0);
 | 
			
		||||
      limeDestroyHeader(h);
 | 
			
		||||
    }
 | 
			
		||||
    return LIME_SUCCESS;
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Write a generic serialisable object
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      std::string xmlstring = writer.docString();
 | 
			
		||||
 | 
			
		||||
      //    std::cout << "WriteLimeObject" << record_name <<std::endl;
 | 
			
		||||
      uint64_t nbytes = xmlstring.size();
 | 
			
		||||
      //    std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
 | 
			
		||||
      int err;
 | 
			
		||||
      LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes); 
 | 
			
		||||
      assert(h!= NULL);
 | 
			
		||||
      
 | 
			
		||||
      err=limeWriteRecordHeader(h, LimeW);                    assert(err>=0);
 | 
			
		||||
      err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
 | 
			
		||||
      err=limeWriterCloseRecord(LimeW);                       assert(err>=0);
 | 
			
		||||
      limeDestroyHeader(h);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class serialisable_object>
 | 
			
		||||
  void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
 | 
			
		||||
  {
 | 
			
		||||
    XmlWriter WR("","");
 | 
			
		||||
 | 
			
		||||
    if (scientificPrec)
 | 
			
		||||
    {
 | 
			
		||||
      WR.scientificFormat(true);
 | 
			
		||||
      WR.setPrecision(scientificPrec);
 | 
			
		||||
    }
 | 
			
		||||
    write(WR,object_name,object);
 | 
			
		||||
    writeLimeObject(MB, ME, WR, object_name, record_name);
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  // Write a generic lattice field and csum
 | 
			
		||||
  // This routine is Collectively called by all nodes
 | 
			
		||||
  // in communicator used by the field._grid
 | 
			
		||||
  ////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
 | 
			
		||||
  {
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // NB: FILE and iostream are jointly writing disjoint sequences in the
 | 
			
		||||
    // the same file through different file handles (integer units).
 | 
			
		||||
    // 
 | 
			
		||||
    // These are both buffered, so why I think this code is right is as follows.
 | 
			
		||||
    //
 | 
			
		||||
    // i)  write record header to FILE *File, telegraphing the size; flush
 | 
			
		||||
    // ii) ftello reads the offset from FILE *File . 
 | 
			
		||||
    // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
 | 
			
		||||
    //      Closes iostream and flushes.
 | 
			
		||||
    // iv) fseek on FILE * to end of this disjoint section.
 | 
			
		||||
    //  v) Continue writing scidac record.
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////
 | 
			
		||||
    
 | 
			
		||||
    GridBase *grid = field._grid;
 | 
			
		||||
    assert(boss_node == field._grid->IsBoss() );
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////
 | 
			
		||||
    // Create record header
 | 
			
		||||
    ////////////////////////////////////////////
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    int err;
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
    uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      createLimeRecordHeader(record_name, 0, 0, PayloadSize);
 | 
			
		||||
      fflush(File);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    //    std::cout << "W sizeof(sobj)"      <<sizeof(sobj)<<std::endl;
 | 
			
		||||
    //    std::cout << "W Gsites "           <<field._grid->_gsites<<std::endl;
 | 
			
		||||
    //    std::cout << "W Payload expected " <<PayloadSize<<std::endl;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////
 | 
			
		||||
    // Check all nodes agree on file position
 | 
			
		||||
    ////////////////////////////////////////////////
 | 
			
		||||
    uint64_t offset1;
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      offset1 = ftello(File);    
 | 
			
		||||
    }
 | 
			
		||||
    grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    // The above is collective. Write by other means into the binary record
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    std::string format = getFormatString<vobj>();
 | 
			
		||||
    BinarySimpleMunger<sobj,sobj> munge;
 | 
			
		||||
    BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    // Wind forward and close the record
 | 
			
		||||
    ///////////////////////////////////////////
 | 
			
		||||
    if ( boss_node ) {
 | 
			
		||||
      fseek(File,0,SEEK_END);             
 | 
			
		||||
      uint64_t offset2 = ftello(File);     //    std::cout << " now at offset "<<offset2 << std::endl;
 | 
			
		||||
      assert( (offset2-offset1) == PayloadSize);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Check MPI-2 I/O did what we expect to file
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    if ( boss_node ) { 
 | 
			
		||||
      err=limeWriterCloseRecord(LimeW);  assert(err>=0);
 | 
			
		||||
    }
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // Write checksum element, propagaing forward from the BinaryIO
 | 
			
		||||
    // Always pair a checksum with a binary object, and close message
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    scidacChecksum checksum;
 | 
			
		||||
    std::stringstream streama; streama << std::hex << scidac_csuma;
 | 
			
		||||
    std::stringstream streamb; streamb << std::hex << scidac_csumb;
 | 
			
		||||
    checksum.suma= streama.str();
 | 
			
		||||
    checksum.sumb= streamb.str();
 | 
			
		||||
    if ( boss_node ) { 
 | 
			
		||||
      writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ScidacWriter : public GridLimeWriter {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss)  { };
 | 
			
		||||
 | 
			
		||||
  template<class SerialisableUserFile>
 | 
			
		||||
  void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
 | 
			
		||||
  {
 | 
			
		||||
    scidacFile    _scidacFile(grid);
 | 
			
		||||
    if ( this->boss_node ) {
 | 
			
		||||
      writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
 | 
			
		||||
      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Write generic lattice field in scidac format
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  template <class vobj, class userRecord>
 | 
			
		||||
  void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
 | 
			
		||||
                              const unsigned int recordScientificPrec = 0) 
 | 
			
		||||
  {
 | 
			
		||||
    GridBase * grid = field._grid;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // fill the Grid header
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
    scidacRecord  _scidacRecord;
 | 
			
		||||
    scidacFile    _scidacFile;
 | 
			
		||||
 | 
			
		||||
    ScidacMetaData(field,header,_scidacRecord,_scidacFile);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Fill the Lime file record by record
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    if ( this->boss_node ) {
 | 
			
		||||
      writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
      writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
 | 
			
		||||
      writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
 | 
			
		||||
    }
 | 
			
		||||
    // Collective call
 | 
			
		||||
    writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ScidacReader : public GridLimeReader {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
   template<class SerialisableUserFile>
 | 
			
		||||
   void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
 | 
			
		||||
   {
 | 
			
		||||
     scidacFile    _scidacFile(grid);
 | 
			
		||||
     readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
 | 
			
		||||
     readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
 | 
			
		||||
   }
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  // Write generic lattice field in scidac format
 | 
			
		||||
  ////////////////////////////////////////////////
 | 
			
		||||
  template <class vobj, class userRecord>
 | 
			
		||||
  void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord) 
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    GridBase * grid = field._grid;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // fill the Grid header
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
    scidacRecord  _scidacRecord;
 | 
			
		||||
    scidacFile    _scidacFile;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Fill the Lime file record by record
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
    readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
 | 
			
		||||
    readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
 | 
			
		||||
    readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
 | 
			
		||||
  }
 | 
			
		||||
  void skipPastBinaryRecord(void) {
 | 
			
		||||
    std::string rec_name(ILDG_BINARY_DATA);
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
      if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) )  ) {
 | 
			
		||||
	skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    }    
 | 
			
		||||
  }
 | 
			
		||||
  void skipPastObjectRecord(std::string rec_name) {
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
      if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) )  ) {
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  void skipScidacFieldRecord() {
 | 
			
		||||
    skipPastObjectRecord(std::string(GRID_FORMAT));
 | 
			
		||||
    skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
 | 
			
		||||
    skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
 | 
			
		||||
    skipPastBinaryRecord();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IldgWriter : public ScidacWriter {
 | 
			
		||||
 public:
 | 
			
		||||
  
 | 
			
		||||
  IldgWriter(bool isboss) : ScidacWriter(isboss) {};
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////
 | 
			
		||||
  // A little helper
 | 
			
		||||
  ///////////////////////////////////
 | 
			
		||||
  void writeLimeIldgLFN(std::string &LFN)
 | 
			
		||||
  {
 | 
			
		||||
    uint64_t PayloadSize = LFN.size();
 | 
			
		||||
    int err;
 | 
			
		||||
    createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
 | 
			
		||||
    err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
 | 
			
		||||
    err=limeWriterCloseRecord(LimeW); assert(err>=0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Special ILDG operations ; gauge configs only.
 | 
			
		||||
  // Don't require scidac records EXCEPT checksum
 | 
			
		||||
  // Use Grid MetaData object if present.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  template <class vsimd>
 | 
			
		||||
  void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description) 
 | 
			
		||||
  {
 | 
			
		||||
    GridBase * grid = Umu._grid;
 | 
			
		||||
    typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
    typedef iLorentzColourMatrix<vsimd> vobj;
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    // fill the Grid header
 | 
			
		||||
    ////////////////////////////////////////
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
    scidacRecord  _scidacRecord;
 | 
			
		||||
    scidacFile    _scidacFile;
 | 
			
		||||
 | 
			
		||||
    ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
 | 
			
		||||
 | 
			
		||||
    std::string format = header.floating_point;
 | 
			
		||||
    header.ensemble_id    = description;
 | 
			
		||||
    header.ensemble_label = description;
 | 
			
		||||
    header.sequence_number = sequence;
 | 
			
		||||
    header.ildg_lfn = LFN;
 | 
			
		||||
 | 
			
		||||
    assert ( (format == std::string("IEEE32BIG"))  
 | 
			
		||||
           ||(format == std::string("IEEE64BIG")) );
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    // Fill ILDG header data struct
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    ildgFormat ildgfmt ;
 | 
			
		||||
    ildgfmt.field     = std::string("su3gauge");
 | 
			
		||||
 | 
			
		||||
    if ( format == std::string("IEEE32BIG") ) { 
 | 
			
		||||
      ildgfmt.precision = 32;
 | 
			
		||||
    } else { 
 | 
			
		||||
      ildgfmt.precision = 64;
 | 
			
		||||
    }
 | 
			
		||||
    ildgfmt.version = 1.0;
 | 
			
		||||
    ildgfmt.lx = header.dimension[0];
 | 
			
		||||
    ildgfmt.ly = header.dimension[1];
 | 
			
		||||
    ildgfmt.lz = header.dimension[2];
 | 
			
		||||
    ildgfmt.lt = header.dimension[3];
 | 
			
		||||
    assert(header.nd==4);
 | 
			
		||||
    assert(header.nd==header.dimension.size());
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Fill the USQCD info field
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    usqcdInfo info;
 | 
			
		||||
    info.version=1.0;
 | 
			
		||||
    info.plaq   = header.plaquette;
 | 
			
		||||
    info.linktr = header.link_trace;
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    // Fill the Lime file record by record
 | 
			
		||||
    //////////////////////////////////////////////
 | 
			
		||||
    writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message 
 | 
			
		||||
    writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
 | 
			
		||||
    writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
 | 
			
		||||
    writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
 | 
			
		||||
    writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
 | 
			
		||||
    writeLimeObject(0,0,ildgfmt,std::string("ildgFormat")   ,std::string(ILDG_FORMAT)); // rec
 | 
			
		||||
    writeLimeIldgLFN(header.ildg_lfn);                                                 // rec
 | 
			
		||||
    writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA));      // Closes message with checksum
 | 
			
		||||
    //    limeDestroyWriter(LimeW);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class IldgReader : public GridLimeReader {
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Read either Grid/SciDAC/ILDG configuration
 | 
			
		||||
  // Don't require scidac records EXCEPT checksum
 | 
			
		||||
  // Use Grid MetaData object if present.
 | 
			
		||||
  // Else use ILDG MetaData object if present.
 | 
			
		||||
  // Else use SciDAC MetaData object if present.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////
 | 
			
		||||
  template <class vsimd>
 | 
			
		||||
  void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
 | 
			
		||||
 | 
			
		||||
    typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
    typedef typename GaugeField::vector_object  vobj;
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
    typedef LorentzColourMatrixF fobj;
 | 
			
		||||
    typedef LorentzColourMatrixD dobj;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    std::vector<int> dims = Umu._grid->FullDimensions();
 | 
			
		||||
 | 
			
		||||
    assert(dims.size()==4);
 | 
			
		||||
 | 
			
		||||
    // Metadata holders
 | 
			
		||||
    ildgFormat     ildgFormat_    ;
 | 
			
		||||
    std::string    ildgLFN_       ;
 | 
			
		||||
    scidacChecksum scidacChecksum_; 
 | 
			
		||||
    usqcdInfo      usqcdInfo_     ;
 | 
			
		||||
 | 
			
		||||
    // track what we read from file
 | 
			
		||||
    int found_ildgFormat    =0;
 | 
			
		||||
    int found_ildgLFN       =0;
 | 
			
		||||
    int found_scidacChecksum=0;
 | 
			
		||||
    int found_usqcdInfo     =0;
 | 
			
		||||
    int found_ildgBinary =0;
 | 
			
		||||
    int found_FieldMetaData =0;
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum;
 | 
			
		||||
    uint32_t scidac_csuma;
 | 
			
		||||
    uint32_t scidac_csumb;
 | 
			
		||||
 | 
			
		||||
    // Binary format
 | 
			
		||||
    std::string format;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Loop over all records
 | 
			
		||||
    // -- Order is poorly guaranteed except ILDG header preceeds binary section.
 | 
			
		||||
    // -- Run like an event loop.
 | 
			
		||||
    // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
 | 
			
		||||
    //    that Scidac. 
 | 
			
		||||
    // -- Insist on Scidac checksum record.
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { 
 | 
			
		||||
 | 
			
		||||
      uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
 | 
			
		||||
      
 | 
			
		||||
      //////////////////////////////////////////////////////////////////
 | 
			
		||||
      // If not BINARY_DATA read a string and parse
 | 
			
		||||
      //////////////////////////////////////////////////////////////////
 | 
			
		||||
      if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) )  ) {
 | 
			
		||||
	
 | 
			
		||||
	// Copy out the string
 | 
			
		||||
	std::vector<char> xmlc(nbytes+1,'\0');
 | 
			
		||||
	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);    
 | 
			
		||||
	//	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
 | 
			
		||||
 | 
			
		||||
	//////////////////////////////////
 | 
			
		||||
	// ILDG format record
 | 
			
		||||
 | 
			
		||||
  std::string xmlstring(&xmlc[0]);
 | 
			
		||||
	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) { 
 | 
			
		||||
 | 
			
		||||
	  XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	  read(RD,"ildgFormat",ildgFormat_);
 | 
			
		||||
 | 
			
		||||
	  if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
 | 
			
		||||
	  if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
 | 
			
		||||
 | 
			
		||||
	  assert( ildgFormat_.lx == dims[0]);
 | 
			
		||||
	  assert( ildgFormat_.ly == dims[1]);
 | 
			
		||||
	  assert( ildgFormat_.lz == dims[2]);
 | 
			
		||||
	  assert( ildgFormat_.lt == dims[3]);
 | 
			
		||||
 | 
			
		||||
	  found_ildgFormat = 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
 | 
			
		||||
	  FieldMetaData_.ildg_lfn = xmlstring;
 | 
			
		||||
	  found_ildgLFN = 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) { 
 | 
			
		||||
 | 
			
		||||
	  XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	  read(RD,"FieldMetaData",FieldMetaData_);
 | 
			
		||||
 | 
			
		||||
	  format = FieldMetaData_.floating_point;
 | 
			
		||||
 | 
			
		||||
	  assert(FieldMetaData_.dimension[0] == dims[0]);
 | 
			
		||||
	  assert(FieldMetaData_.dimension[1] == dims[1]);
 | 
			
		||||
	  assert(FieldMetaData_.dimension[2] == dims[2]);
 | 
			
		||||
	  assert(FieldMetaData_.dimension[3] == dims[3]);
 | 
			
		||||
 | 
			
		||||
	  found_FieldMetaData = 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) { 
 | 
			
		||||
	  // is it a USQCD info field
 | 
			
		||||
	  if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) { 
 | 
			
		||||
	    //	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
 | 
			
		||||
	    XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	    read(RD,"usqcdInfo",usqcdInfo_);
 | 
			
		||||
	    found_usqcdInfo = 1;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) { 
 | 
			
		||||
	  XmlReader RD(xmlstring, true, "");
 | 
			
		||||
	  read(RD,"scidacChecksum",scidacChecksum_);
 | 
			
		||||
	  found_scidacChecksum = 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
      } else {  
 | 
			
		||||
	/////////////////////////////////
 | 
			
		||||
	// Binary data
 | 
			
		||||
	/////////////////////////////////
 | 
			
		||||
	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl;
 | 
			
		||||
	uint64_t offset= ftello(File);
 | 
			
		||||
	if ( format == std::string("IEEE64BIG") ) {
 | 
			
		||||
	  GaugeSimpleMunger<dobj, sobj> munge;
 | 
			
		||||
	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	} else { 
 | 
			
		||||
	  GaugeSimpleMunger<fobj, sobj> munge;
 | 
			
		||||
	  BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	found_ildgBinary = 1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    // Minimally must find binary segment and checksum
 | 
			
		||||
    // Since this is an ILDG reader require ILDG format
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    assert(found_ildgBinary);
 | 
			
		||||
    assert(found_ildgFormat);
 | 
			
		||||
    assert(found_scidacChecksum);
 | 
			
		||||
 | 
			
		||||
    // Must find something with the lattice dimensions
 | 
			
		||||
    assert(found_FieldMetaData||found_ildgFormat);
 | 
			
		||||
 | 
			
		||||
    if ( found_FieldMetaData ) {
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
 | 
			
		||||
 | 
			
		||||
    } else { 
 | 
			
		||||
 | 
			
		||||
      assert(found_ildgFormat);
 | 
			
		||||
      assert ( ildgFormat_.field == std::string("su3gauge") );
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
      // Populate our Grid metadata as best we can
 | 
			
		||||
      ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
      std::ostringstream vers; vers << ildgFormat_.version;
 | 
			
		||||
      FieldMetaData_.hdr_version = vers.str();
 | 
			
		||||
      FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
 | 
			
		||||
 | 
			
		||||
      FieldMetaData_.nd=4;
 | 
			
		||||
      FieldMetaData_.dimension.resize(4);
 | 
			
		||||
 | 
			
		||||
      FieldMetaData_.dimension[0] = ildgFormat_.lx ;
 | 
			
		||||
      FieldMetaData_.dimension[1] = ildgFormat_.ly ;
 | 
			
		||||
      FieldMetaData_.dimension[2] = ildgFormat_.lz ;
 | 
			
		||||
      FieldMetaData_.dimension[3] = ildgFormat_.lt ;
 | 
			
		||||
 | 
			
		||||
      if ( found_usqcdInfo ) { 
 | 
			
		||||
	FieldMetaData_.plaquette = usqcdInfo_.plaq;
 | 
			
		||||
	FieldMetaData_.link_trace= usqcdInfo_.linktr;
 | 
			
		||||
	std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
 | 
			
		||||
	std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
 | 
			
		||||
      } else { 
 | 
			
		||||
	FieldMetaData_.plaquette = 0.0;
 | 
			
		||||
	FieldMetaData_.link_trace= 0.0;
 | 
			
		||||
	std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    // Really really want to mandate a scidac checksum
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
    if ( found_scidacChecksum ) {
 | 
			
		||||
      FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
 | 
			
		||||
      FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
 | 
			
		||||
      scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
 | 
			
		||||
      assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
 | 
			
		||||
      assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
 | 
			
		||||
      std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
 | 
			
		||||
    } else { 
 | 
			
		||||
      std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
 | 
			
		||||
      assert(0); // Can I insist always checksum ?
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( found_FieldMetaData || found_usqcdInfo ) {
 | 
			
		||||
      FieldMetaData checker;
 | 
			
		||||
      GaugeStatistics(Umu,checker);
 | 
			
		||||
      assert(fabs(checker.plaquette  - FieldMetaData_.plaquette )<1.0e-5);
 | 
			
		||||
      assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
 | 
			
		||||
      std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
//HAVE_LIME
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,237 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/parallelIO/IldgIO.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ILDGTYPES_IO_H
 | 
			
		||||
#define GRID_ILDGTYPES_IO_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
extern "C" { // for linkage
 | 
			
		||||
#include "lime.h"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Data representation of records that enter ILDG and SciDac formats
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#define GRID_FORMAT      "grid-format"
 | 
			
		||||
#define ILDG_FORMAT      "ildg-format"
 | 
			
		||||
#define ILDG_BINARY_DATA "ildg-binary-data"
 | 
			
		||||
#define ILDG_DATA_LFN    "ildg-data-lfn"
 | 
			
		||||
#define SCIDAC_CHECKSUM           "scidac-checksum"
 | 
			
		||||
#define SCIDAC_PRIVATE_FILE_XML   "scidac-private-file-xml"
 | 
			
		||||
#define SCIDAC_FILE_XML           "scidac-file-xml"
 | 
			
		||||
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
 | 
			
		||||
#define SCIDAC_RECORD_XML         "scidac-record-xml"
 | 
			
		||||
#define SCIDAC_BINARY_DATA        "scidac-binary-data"
 | 
			
		||||
// Unused SCIDAC records names; could move to support this functionality
 | 
			
		||||
#define SCIDAC_SITELIST           "scidac-sitelist"
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
 | 
			
		||||
  const int GRID_IO_MULTIFILE  = 1; // hardcode lift from QIO compat
 | 
			
		||||
  const int GRID_IO_FIELD      = 0; // hardcode lift from QIO compat
 | 
			
		||||
  const int GRID_IO_GLOBAL     = 1; // hardcode lift from QIO compat
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// QIO uses mandatory "private" records fixed format
 | 
			
		||||
// Private is in principle "opaque" however it can't be changed now because that would break existing 
 | 
			
		||||
// file compatability, so should be correct to assume the undocumented but defacto file structure.
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
struct emptyUserRecord : Serializable { 
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
 | 
			
		||||
  emptyUserRecord() { dummy=0; };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////
 | 
			
		||||
// Scidac private file xml
 | 
			
		||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
 | 
			
		||||
////////////////////////
 | 
			
		||||
struct scidacFile : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
 | 
			
		||||
                                  double, version,
 | 
			
		||||
                                  int, spacetime,
 | 
			
		||||
				  std::string, dims, // must convert to int
 | 
			
		||||
                                  int, volfmt);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> getDimensions(void) { 
 | 
			
		||||
    std::stringstream stream(dims);
 | 
			
		||||
    std::vector<int> dimensions;
 | 
			
		||||
    int n;
 | 
			
		||||
    while(stream >> n){
 | 
			
		||||
      dimensions.push_back(n);
 | 
			
		||||
    }
 | 
			
		||||
    return dimensions;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void setDimensions(std::vector<int> dimensions) { 
 | 
			
		||||
    char delimiter = ' ';
 | 
			
		||||
    std::stringstream stream;
 | 
			
		||||
    for(int i=0;i<dimensions.size();i++){ 
 | 
			
		||||
      stream << dimensions[i];
 | 
			
		||||
      if ( i != dimensions.size()-1) { 
 | 
			
		||||
	stream << delimiter <<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    dims = stream.str();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Constructor provides Grid
 | 
			
		||||
  scidacFile() =default; // default constructor
 | 
			
		||||
  scidacFile(GridBase * grid){
 | 
			
		||||
    version      = 1.0;
 | 
			
		||||
    spacetime    = grid->_ndimension;
 | 
			
		||||
    setDimensions(grid->FullDimensions()); 
 | 
			
		||||
    volfmt       = GRID_IO_SINGLEFILE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////
 | 
			
		||||
// scidac-private-record-xml : example
 | 
			
		||||
// <scidacRecord>
 | 
			
		||||
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
 | 
			
		||||
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
 | 
			
		||||
// <typesize>144</typesize><datacount>4</datacount>
 | 
			
		||||
// </scidacRecord>
 | 
			
		||||
///////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
struct scidacRecord : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
 | 
			
		||||
                                  double, version,
 | 
			
		||||
                                  std::string, date,
 | 
			
		||||
				  int, recordtype,
 | 
			
		||||
				  std::string, datatype,
 | 
			
		||||
				  std::string, precision,
 | 
			
		||||
				  int, colors,
 | 
			
		||||
				  int, spins,
 | 
			
		||||
				  int, typesize,
 | 
			
		||||
				  int, datacount);
 | 
			
		||||
 | 
			
		||||
  scidacRecord()
 | 
			
		||||
  : version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
 | 
			
		||||
  {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////
 | 
			
		||||
// ILDG format
 | 
			
		||||
////////////////////////
 | 
			
		||||
struct ildgFormat : Serializable {
 | 
			
		||||
public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  std::string, field,
 | 
			
		||||
				  int, precision,
 | 
			
		||||
				  int, lx,
 | 
			
		||||
				  int, ly,
 | 
			
		||||
				  int, lz,
 | 
			
		||||
				  int, lt);
 | 
			
		||||
  ildgFormat() { version=1.0; };
 | 
			
		||||
};
 | 
			
		||||
////////////////////////
 | 
			
		||||
// USQCD info
 | 
			
		||||
////////////////////////
 | 
			
		||||
struct usqcdInfo : Serializable { 
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  double, plaq,
 | 
			
		||||
				  double, linktr,
 | 
			
		||||
				  std::string, info);
 | 
			
		||||
  usqcdInfo() { 
 | 
			
		||||
    version=1.0; 
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
////////////////////////
 | 
			
		||||
// Scidac Checksum
 | 
			
		||||
////////////////////////
 | 
			
		||||
struct scidacChecksum : Serializable { 
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  std::string, suma,
 | 
			
		||||
				  std::string, sumb);
 | 
			
		||||
  scidacChecksum() { 
 | 
			
		||||
    version=1.0; 
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Type:           scidac-file-xml         <title>MILC ILDG archival gauge configuration</title>
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Type:           
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
////////////////////////
 | 
			
		||||
// Scidac private file xml 
 | 
			
		||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> 
 | 
			
		||||
////////////////////////                                                                                                                                                                              
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
struct usqcdPropFile : Serializable { 
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  std::string, type,
 | 
			
		||||
				  std::string, info);
 | 
			
		||||
  usqcdPropFile() { 
 | 
			
		||||
    version=1.0; 
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
struct usqcdSourceInfo : Serializable { 
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  std::string, info);
 | 
			
		||||
  usqcdSourceInfo() { 
 | 
			
		||||
    version=1.0; 
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
struct usqcdPropInfo : Serializable { 
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
 | 
			
		||||
				  double, version,
 | 
			
		||||
				  int, spin,
 | 
			
		||||
				  int, color,
 | 
			
		||||
				  std::string, info);
 | 
			
		||||
  usqcdPropInfo() { 
 | 
			
		||||
    version=1.0; 
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,327 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/parallelIO/NerscIO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/utsname.h>
 | 
			
		||||
#include <pwd.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
  // Precision mapping
 | 
			
		||||
  ///////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj> static std::string getFormatString (void)
 | 
			
		||||
  {
 | 
			
		||||
    std::string format;
 | 
			
		||||
    typedef typename getPrecision<vobj>::real_scalar_type stype;
 | 
			
		||||
    if ( sizeof(stype) == sizeof(float) ) {
 | 
			
		||||
      format = std::string("IEEE32BIG");
 | 
			
		||||
    }
 | 
			
		||||
    if ( sizeof(stype) == sizeof(double) ) {
 | 
			
		||||
      format = std::string("IEEE64BIG");
 | 
			
		||||
    }
 | 
			
		||||
    return format;
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // header specification/interpretation
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    class FieldMetaData : Serializable {
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
 | 
			
		||||
				      int, nd,
 | 
			
		||||
				      std::vector<int>, dimension,
 | 
			
		||||
				      std::vector<std::string>, boundary,
 | 
			
		||||
				      int, data_start,
 | 
			
		||||
				      std::string, hdr_version,
 | 
			
		||||
				      std::string, storage_format,
 | 
			
		||||
				      double, link_trace,
 | 
			
		||||
				      double, plaquette,
 | 
			
		||||
				      uint32_t, checksum,
 | 
			
		||||
				      uint32_t, scidac_checksuma,
 | 
			
		||||
				      uint32_t, scidac_checksumb,
 | 
			
		||||
				      unsigned int, sequence_number,
 | 
			
		||||
				      std::string, data_type,
 | 
			
		||||
				      std::string, ensemble_id,
 | 
			
		||||
				      std::string, ensemble_label,
 | 
			
		||||
				      std::string, ildg_lfn,
 | 
			
		||||
				      std::string, creator,
 | 
			
		||||
				      std::string, creator_hardware,
 | 
			
		||||
				      std::string, creation_date,
 | 
			
		||||
				      std::string, archive_date,
 | 
			
		||||
				      std::string, floating_point);
 | 
			
		||||
      // WARNING: non-initialised values might lead to twisted parallel IO
 | 
			
		||||
      // issues, std::string are fine because they initliase to size 0
 | 
			
		||||
      // as per C++ standard.
 | 
			
		||||
      FieldMetaData(void) 
 | 
			
		||||
      : nd(4), dimension(4,0), boundary(4, ""), data_start(0),
 | 
			
		||||
      link_trace(0.), plaquette(0.), checksum(0),
 | 
			
		||||
      scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
 | 
			
		||||
      {}
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
    using namespace Grid;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Bit and Physical Checksumming and QA of data
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////
 | 
			
		||||
    inline void GridMetaData(GridBase *grid,FieldMetaData &header)
 | 
			
		||||
    {
 | 
			
		||||
      int nd = grid->_ndimension;
 | 
			
		||||
      header.nd = nd;
 | 
			
		||||
      header.dimension.resize(nd);
 | 
			
		||||
      header.boundary.resize(nd);
 | 
			
		||||
      header.data_start = 0;
 | 
			
		||||
      for(int d=0;d<nd;d++) {
 | 
			
		||||
	header.dimension[d] = grid->_fdimensions[d];
 | 
			
		||||
      }
 | 
			
		||||
      for(int d=0;d<nd;d++) {
 | 
			
		||||
	header.boundary[d] = std::string("PERIODIC");
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline void MachineCharacteristics(FieldMetaData &header)
 | 
			
		||||
    {
 | 
			
		||||
      // Who
 | 
			
		||||
      struct passwd *pw = getpwuid (getuid());
 | 
			
		||||
      if (pw) header.creator = std::string(pw->pw_name); 
 | 
			
		||||
 | 
			
		||||
      // When
 | 
			
		||||
      std::time_t t = std::time(nullptr);
 | 
			
		||||
      std::tm tm_ = *std::localtime(&t);
 | 
			
		||||
      std::ostringstream oss; 
 | 
			
		||||
      //      oss << std::put_time(&tm_, "%c %Z");
 | 
			
		||||
      header.creation_date = oss.str();
 | 
			
		||||
      header.archive_date  = header.creation_date;
 | 
			
		||||
 | 
			
		||||
      // What
 | 
			
		||||
      struct utsname name;  uname(&name);
 | 
			
		||||
      header.creator_hardware = std::string(name.nodename)+"-";
 | 
			
		||||
      header.creator_hardware+= std::string(name.machine)+"-";
 | 
			
		||||
      header.creator_hardware+= std::string(name.sysname)+"-";
 | 
			
		||||
      header.creator_hardware+= std::string(name.release);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#define dump_meta_data(field, s)					\
 | 
			
		||||
      s << "BEGIN_HEADER"      << std::endl;				\
 | 
			
		||||
      s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\
 | 
			
		||||
      s << "DATATYPE = "       << field.data_type      << std::endl;	\
 | 
			
		||||
      s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\
 | 
			
		||||
      for(int i=0;i<4;i++){						\
 | 
			
		||||
	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
 | 
			
		||||
      }									\
 | 
			
		||||
      s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
 | 
			
		||||
      s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \
 | 
			
		||||
      for(int i=0;i<4;i++){						\
 | 
			
		||||
	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\
 | 
			
		||||
      }									\
 | 
			
		||||
									\
 | 
			
		||||
      s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
 | 
			
		||||
      s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
 | 
			
		||||
      s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
 | 
			
		||||
      s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\
 | 
			
		||||
      s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\
 | 
			
		||||
      s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\
 | 
			
		||||
      s << "CREATOR = "         << field.creator          << std::endl;	\
 | 
			
		||||
      s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\
 | 
			
		||||
      s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\
 | 
			
		||||
      s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\
 | 
			
		||||
      s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\
 | 
			
		||||
      s << "END_HEADER"         << std::endl;
 | 
			
		||||
 | 
			
		||||
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = field._grid;
 | 
			
		||||
  std::string format = getFormatString<vobj>();
 | 
			
		||||
   header.floating_point = format;
 | 
			
		||||
   header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
 | 
			
		||||
   GridMetaData(grid,header); 
 | 
			
		||||
   MachineCharacteristics(header);
 | 
			
		||||
 }
 | 
			
		||||
 inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
 | 
			
		||||
 {
 | 
			
		||||
   // How to convert data precision etc...
 | 
			
		||||
   header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
 | 
			
		||||
   header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
 | 
			
		||||
 }
 | 
			
		||||
 inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
 | 
			
		||||
 {
 | 
			
		||||
   // How to convert data precision etc...
 | 
			
		||||
   header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
 | 
			
		||||
   header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
 | 
			
		||||
 }
 | 
			
		||||
 template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
 | 
			
		||||
 {
 | 
			
		||||
   
 | 
			
		||||
   GridBase *grid = field._grid;
 | 
			
		||||
   std::string format = getFormatString<vLorentzColourMatrixF>();
 | 
			
		||||
   header.floating_point = format;
 | 
			
		||||
   header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
 | 
			
		||||
   GridMetaData(grid,header); 
 | 
			
		||||
   GaugeStatistics(field,header);
 | 
			
		||||
   MachineCharacteristics(header);
 | 
			
		||||
 }
 | 
			
		||||
 template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
 | 
			
		||||
 {
 | 
			
		||||
   GridBase *grid = field._grid;
 | 
			
		||||
   std::string format = getFormatString<vLorentzColourMatrixD>();
 | 
			
		||||
   header.floating_point = format;
 | 
			
		||||
   header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
 | 
			
		||||
   GridMetaData(grid,header); 
 | 
			
		||||
   GaugeStatistics(field,header);
 | 
			
		||||
   MachineCharacteristics(header);
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Utilities ; these are QCD aware
 | 
			
		||||
    //////////////////////////////////////////////////////////////////////
 | 
			
		||||
    inline void reconstruct3(LorentzColourMatrix & cm)
 | 
			
		||||
    {
 | 
			
		||||
      const int x=0;
 | 
			
		||||
      const int y=1;
 | 
			
		||||
      const int z=2;
 | 
			
		||||
      for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
 | 
			
		||||
	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
 | 
			
		||||
	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Some data types for intermediate storage
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
 | 
			
		||||
 | 
			
		||||
    typedef iLorentzColour2x3<Complex>  LorentzColour2x3;
 | 
			
		||||
    typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
 | 
			
		||||
    typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Simple classes for precision conversion
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class fobj, class sobj>
 | 
			
		||||
struct BinarySimpleUnmunger {
 | 
			
		||||
  typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
 | 
			
		||||
  typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
 | 
			
		||||
  
 | 
			
		||||
  void operator()(sobj &in, fobj &out) {
 | 
			
		||||
    // take word by word and transform accoding to the status
 | 
			
		||||
    fobj_stype *out_buffer = (fobj_stype *)&out;
 | 
			
		||||
    sobj_stype *in_buffer = (sobj_stype *)∈
 | 
			
		||||
    size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
 | 
			
		||||
    size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
 | 
			
		||||
    assert(fobj_words == sobj_words);
 | 
			
		||||
    
 | 
			
		||||
    for (unsigned int word = 0; word < sobj_words; word++)
 | 
			
		||||
      out_buffer[word] = in_buffer[word];  // type conversion on the fly
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class fobj, class sobj>
 | 
			
		||||
struct BinarySimpleMunger {
 | 
			
		||||
  typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
 | 
			
		||||
  typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
 | 
			
		||||
 | 
			
		||||
  void operator()(fobj &in, sobj &out) {
 | 
			
		||||
    // take word by word and transform accoding to the status
 | 
			
		||||
    fobj_stype *in_buffer = (fobj_stype *)∈
 | 
			
		||||
    sobj_stype *out_buffer = (sobj_stype *)&out;
 | 
			
		||||
    size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
 | 
			
		||||
    size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
 | 
			
		||||
    assert(fobj_words == sobj_words);
 | 
			
		||||
    
 | 
			
		||||
    for (unsigned int word = 0; word < sobj_words; word++)
 | 
			
		||||
      out_buffer[word] = in_buffer[word];  // type conversion on the fly
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct GaugeSimpleMunger{
 | 
			
		||||
      void operator()(fobj &in, sobj &out) {
 | 
			
		||||
        for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
          for (int i = 0; i < Nc; i++) {
 | 
			
		||||
          for (int j = 0; j < Nc; j++) {
 | 
			
		||||
	    out(mu)()(i, j) = in(mu)()(i, j);
 | 
			
		||||
	  }}
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template <class fobj, class sobj>
 | 
			
		||||
    struct GaugeSimpleUnmunger {
 | 
			
		||||
 | 
			
		||||
      void operator()(sobj &in, fobj &out) {
 | 
			
		||||
        for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
          for (int i = 0; i < Nc; i++) {
 | 
			
		||||
          for (int j = 0; j < Nc; j++) {
 | 
			
		||||
	    out(mu)()(i, j) = in(mu)()(i, j);
 | 
			
		||||
	  }}
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct Gauge3x2munger{
 | 
			
		||||
      void operator() (fobj &in,sobj &out){
 | 
			
		||||
	for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	  for(int i=0;i<2;i++){
 | 
			
		||||
	  for(int j=0;j<3;j++){
 | 
			
		||||
	    out(mu)()(i,j) = in(mu)(i)(j);
 | 
			
		||||
	  }}
 | 
			
		||||
	}
 | 
			
		||||
	reconstruct3(out);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class fobj,class sobj>
 | 
			
		||||
    struct Gauge3x2unmunger{
 | 
			
		||||
      void operator() (sobj &in,fobj &out){
 | 
			
		||||
	for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
	  for(int i=0;i<2;i++){
 | 
			
		||||
	  for(int j=0;j<3;j++){
 | 
			
		||||
	    out(mu)(i)(j) = in(mu)()(i,j);
 | 
			
		||||
	  }}
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,363 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/parallelIO/NerscIO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
    Author: Matt Spraggs <matthew.spraggs@gmail.com>
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_NERSC_IO_H
 | 
			
		||||
#define GRID_NERSC_IO_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
    using namespace Grid;
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Write and read from fstream; comput header offset for payload
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    class NerscIO : public BinaryIO { 
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      static inline void truncate(std::string file){
 | 
			
		||||
	std::ofstream fout(file,std::ios::out);
 | 
			
		||||
      }
 | 
			
		||||
  
 | 
			
		||||
      static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
 | 
			
		||||
      {
 | 
			
		||||
      std::ofstream fout(file,std::ios::out|std::ios::in);
 | 
			
		||||
      fout.seekp(0,std::ios::beg);
 | 
			
		||||
      dump_meta_data(field, fout);
 | 
			
		||||
      field.data_start = fout.tellp();
 | 
			
		||||
      return field.data_start;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
      // for the header-reader
 | 
			
		||||
      static inline int readHeader(std::string file,GridBase *grid,  FieldMetaData &field)
 | 
			
		||||
      {
 | 
			
		||||
      uint64_t offset=0;
 | 
			
		||||
      std::map<std::string,std::string> header;
 | 
			
		||||
      std::string line;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////
 | 
			
		||||
      // read the header
 | 
			
		||||
      //////////////////////////////////////////////////
 | 
			
		||||
      std::ifstream fin(file);
 | 
			
		||||
 | 
			
		||||
      getline(fin,line); // read one line and insist is 
 | 
			
		||||
 | 
			
		||||
      removeWhitespace(line);
 | 
			
		||||
      std::cout << GridLogMessage << "* " << line << std::endl;
 | 
			
		||||
 | 
			
		||||
      assert(line==std::string("BEGIN_HEADER"));
 | 
			
		||||
 | 
			
		||||
      do {
 | 
			
		||||
      getline(fin,line); // read one line
 | 
			
		||||
      std::cout << GridLogMessage << "* "<<line<< std::endl;
 | 
			
		||||
      int eq = line.find("=");
 | 
			
		||||
      if(eq >0) {
 | 
			
		||||
      std::string key=line.substr(0,eq);
 | 
			
		||||
      std::string val=line.substr(eq+1);
 | 
			
		||||
      removeWhitespace(key);
 | 
			
		||||
      removeWhitespace(val);
 | 
			
		||||
      
 | 
			
		||||
      header[key] = val;
 | 
			
		||||
    }
 | 
			
		||||
    } while( line.find("END_HEADER") == std::string::npos );
 | 
			
		||||
 | 
			
		||||
      field.data_start = fin.tellg();
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////
 | 
			
		||||
      // chomp the values
 | 
			
		||||
      //////////////////////////////////////////////////
 | 
			
		||||
      field.hdr_version    = header["HDR_VERSION"];
 | 
			
		||||
      field.data_type      = header["DATATYPE"];
 | 
			
		||||
      field.storage_format = header["STORAGE_FORMAT"];
 | 
			
		||||
  
 | 
			
		||||
      field.dimension[0] = std::stol(header["DIMENSION_1"]);
 | 
			
		||||
      field.dimension[1] = std::stol(header["DIMENSION_2"]);
 | 
			
		||||
      field.dimension[2] = std::stol(header["DIMENSION_3"]);
 | 
			
		||||
      field.dimension[3] = std::stol(header["DIMENSION_4"]);
 | 
			
		||||
 | 
			
		||||
      assert(grid->_ndimension == 4);
 | 
			
		||||
      for(int d=0;d<4;d++){
 | 
			
		||||
      assert(grid->_fdimensions[d]==field.dimension[d]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
      field.link_trace = std::stod(header["LINK_TRACE"]);
 | 
			
		||||
      field.plaquette  = std::stod(header["PLAQUETTE"]);
 | 
			
		||||
 | 
			
		||||
      field.boundary[0] = header["BOUNDARY_1"];
 | 
			
		||||
      field.boundary[1] = header["BOUNDARY_2"];
 | 
			
		||||
      field.boundary[2] = header["BOUNDARY_3"];
 | 
			
		||||
      field.boundary[3] = header["BOUNDARY_4"];
 | 
			
		||||
 | 
			
		||||
      field.checksum = std::stoul(header["CHECKSUM"],0,16);
 | 
			
		||||
      field.ensemble_id      = header["ENSEMBLE_ID"];
 | 
			
		||||
      field.ensemble_label   = header["ENSEMBLE_LABEL"];
 | 
			
		||||
      field.sequence_number  = std::stol(header["SEQUENCE_NUMBER"]);
 | 
			
		||||
      field.creator          = header["CREATOR"];
 | 
			
		||||
      field.creator_hardware = header["CREATOR_HARDWARE"];
 | 
			
		||||
      field.creation_date    = header["CREATION_DATE"];
 | 
			
		||||
      field.archive_date     = header["ARCHIVE_DATE"];
 | 
			
		||||
      field.floating_point   = header["FLOATING_POINT"];
 | 
			
		||||
 | 
			
		||||
      return field.data_start;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Now the meat: the object readers
 | 
			
		||||
    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    template<class vsimd>
 | 
			
		||||
    static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
 | 
			
		||||
					 FieldMetaData& header,
 | 
			
		||||
					 std::string file)
 | 
			
		||||
    {
 | 
			
		||||
      typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
 | 
			
		||||
      GridBase *grid = Umu._grid;
 | 
			
		||||
      uint64_t offset = readHeader(file,Umu._grid,header);
 | 
			
		||||
 | 
			
		||||
      FieldMetaData clone(header);
 | 
			
		||||
 | 
			
		||||
      std::string format(header.floating_point);
 | 
			
		||||
 | 
			
		||||
      int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
      int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
      int ieee64big = (format == std::string("IEEE64BIG"));
 | 
			
		||||
      int ieee64    = (format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
      // depending on datatype, set up munger;
 | 
			
		||||
      // munger is a function of <floating point, Real, data_type>
 | 
			
		||||
      if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
 | 
			
		||||
	if ( ieee32 || ieee32big ) {
 | 
			
		||||
	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
	    (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	}
 | 
			
		||||
	if ( ieee64 || ieee64big ) {
 | 
			
		||||
	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
	    (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	}
 | 
			
		||||
      } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
 | 
			
		||||
	if ( ieee32 || ieee32big ) {
 | 
			
		||||
	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	}
 | 
			
		||||
	if ( ieee64 || ieee64big ) {
 | 
			
		||||
	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
 | 
			
		||||
	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
 | 
			
		||||
	     nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	}
 | 
			
		||||
      } else {
 | 
			
		||||
	assert(0);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      GaugeStatistics(Umu,clone);
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
 | 
			
		||||
	       <<" header   "<<std::hex<<header.checksum<<std::dec <<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
 | 
			
		||||
	       <<" header    "<<header.plaquette<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
 | 
			
		||||
	       <<" header    "<<header.link_trace<<std::endl;
 | 
			
		||||
 | 
			
		||||
      if ( fabs(clone.plaquette -header.plaquette ) >=  1.0e-5 ) { 
 | 
			
		||||
	std::cout << " Plaquette mismatch "<<std::endl;
 | 
			
		||||
	std::cout << Umu[0]<<std::endl;
 | 
			
		||||
	std::cout << Umu[1]<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
      if ( nersc_csum != header.checksum ) { 
 | 
			
		||||
	std::cerr << " checksum mismatch " << std::endl;
 | 
			
		||||
	std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
 | 
			
		||||
	std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
 | 
			
		||||
	std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
 | 
			
		||||
	exit(0);
 | 
			
		||||
      }
 | 
			
		||||
      assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
 | 
			
		||||
      assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
 | 
			
		||||
      assert(nersc_csum == header.checksum );
 | 
			
		||||
      
 | 
			
		||||
      std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
      template<class vsimd>
 | 
			
		||||
      static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
 | 
			
		||||
					    std::string file, 
 | 
			
		||||
					    int two_row,
 | 
			
		||||
					    int bits32)
 | 
			
		||||
      {
 | 
			
		||||
	typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
 | 
			
		||||
 | 
			
		||||
	typedef iLorentzColourMatrix<vsimd> vobj;
 | 
			
		||||
	typedef typename vobj::scalar_object sobj;
 | 
			
		||||
 | 
			
		||||
	FieldMetaData header;
 | 
			
		||||
	///////////////////////////////////////////
 | 
			
		||||
	// Following should become arguments
 | 
			
		||||
	///////////////////////////////////////////
 | 
			
		||||
	header.sequence_number = 1;
 | 
			
		||||
	header.ensemble_id     = "UKQCD";
 | 
			
		||||
	header.ensemble_label  = "DWF";
 | 
			
		||||
 | 
			
		||||
	typedef LorentzColourMatrixD fobj3D;
 | 
			
		||||
	typedef LorentzColour2x3D    fobj2D;
 | 
			
		||||
  
 | 
			
		||||
	GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
	GridMetaData(grid,header);
 | 
			
		||||
	assert(header.nd==4);
 | 
			
		||||
	GaugeStatistics(Umu,header);
 | 
			
		||||
	MachineCharacteristics(header);
 | 
			
		||||
 | 
			
		||||
	uint64_t offset;
 | 
			
		||||
 | 
			
		||||
	// Sod it -- always write 3x3 double
 | 
			
		||||
	header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
	header.data_type      = std::string("4D_SU3_GAUGE_3x3");
 | 
			
		||||
	GaugeSimpleUnmunger<fobj3D,sobj> munge;
 | 
			
		||||
	if ( grid->IsBoss() ) { 
 | 
			
		||||
	  truncate(file);
 | 
			
		||||
	  offset = writeHeader(header,file);
 | 
			
		||||
	}
 | 
			
		||||
	grid->Broadcast(0,(void *)&offset,sizeof(offset));
 | 
			
		||||
 | 
			
		||||
	uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
	BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
 | 
			
		||||
								  nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	header.checksum = nersc_csum;
 | 
			
		||||
	if ( grid->IsBoss() ) { 
 | 
			
		||||
	  writeHeader(header,file);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
 | 
			
		||||
		 <<std::hex<<header.checksum
 | 
			
		||||
		 <<std::dec<<" plaq "<< header.plaquette <<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
      ///////////////////////////////
 | 
			
		||||
      // RNG state
 | 
			
		||||
      ///////////////////////////////
 | 
			
		||||
      static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file)
 | 
			
		||||
      {
 | 
			
		||||
	typedef typename GridParallelRNG::RngStateType RngStateType;
 | 
			
		||||
 | 
			
		||||
	// Following should become arguments
 | 
			
		||||
	FieldMetaData header;
 | 
			
		||||
	header.sequence_number = 1;
 | 
			
		||||
	header.ensemble_id     = "UKQCD";
 | 
			
		||||
	header.ensemble_label  = "DWF";
 | 
			
		||||
 | 
			
		||||
	GridBase *grid = parallel._grid;
 | 
			
		||||
 | 
			
		||||
	GridMetaData(grid,header);
 | 
			
		||||
	assert(header.nd==4);
 | 
			
		||||
	header.link_trace=0.0;
 | 
			
		||||
	header.plaquette=0.0;
 | 
			
		||||
	MachineCharacteristics(header);
 | 
			
		||||
 | 
			
		||||
	uint64_t offset;
 | 
			
		||||
  
 | 
			
		||||
#ifdef RNG_RANLUX
 | 
			
		||||
	header.floating_point = std::string("UINT64");
 | 
			
		||||
	header.data_type      = std::string("RANLUX48");
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef RNG_MT19937
 | 
			
		||||
	header.floating_point = std::string("UINT32");
 | 
			
		||||
	header.data_type      = std::string("MT19937");
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef RNG_SITMO
 | 
			
		||||
	header.floating_point = std::string("UINT64");
 | 
			
		||||
	header.data_type      = std::string("SITMO");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	if ( grid->IsBoss() ) { 
 | 
			
		||||
	  truncate(file);
 | 
			
		||||
	  offset = writeHeader(header,file);
 | 
			
		||||
	}
 | 
			
		||||
	grid->Broadcast(0,(void *)&offset,sizeof(offset));
 | 
			
		||||
	
 | 
			
		||||
	uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
	BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
	header.checksum = nersc_csum;
 | 
			
		||||
	if ( grid->IsBoss() ) { 
 | 
			
		||||
	  offset = writeHeader(header,file);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage 
 | 
			
		||||
		 <<"Written NERSC RNG STATE "<<file<< " checksum "
 | 
			
		||||
		 <<std::hex<<header.checksum
 | 
			
		||||
		 <<std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    
 | 
			
		||||
      static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
 | 
			
		||||
      {
 | 
			
		||||
	typedef typename GridParallelRNG::RngStateType RngStateType;
 | 
			
		||||
 | 
			
		||||
	GridBase *grid = parallel._grid;
 | 
			
		||||
 | 
			
		||||
	uint64_t offset = readHeader(file,grid,header);
 | 
			
		||||
 | 
			
		||||
	FieldMetaData clone(header);
 | 
			
		||||
 | 
			
		||||
	std::string format(header.floating_point);
 | 
			
		||||
	std::string data_type(header.data_type);
 | 
			
		||||
 | 
			
		||||
#ifdef RNG_RANLUX
 | 
			
		||||
	assert(format == std::string("UINT64"));
 | 
			
		||||
	assert(data_type == std::string("RANLUX48"));
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef RNG_MT19937
 | 
			
		||||
	assert(format == std::string("UINT32"));
 | 
			
		||||
	assert(data_type == std::string("MT19937"));
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef RNG_SITMO
 | 
			
		||||
	assert(format == std::string("UINT64"));
 | 
			
		||||
	assert(data_type == std::string("SITMO"));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	// depending on datatype, set up munger;
 | 
			
		||||
	// munger is a function of <floating point, Real, data_type>
 | 
			
		||||
	uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
	BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
	if ( nersc_csum != header.checksum ) { 
 | 
			
		||||
	  std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
 | 
			
		||||
	  exit(0);
 | 
			
		||||
	}
 | 
			
		||||
	assert(nersc_csum == header.checksum );
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  }}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,124 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/QCD.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LT_H
 | 
			
		||||
#define GRID_LT_H
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
// First steps in the complete generalization of the Physics part
 | 
			
		||||
// Design not final
 | 
			
		||||
namespace LatticeTheories {
 | 
			
		||||
 | 
			
		||||
template <int Dimensions>
 | 
			
		||||
struct LatticeTheory {
 | 
			
		||||
  static const int Nd = Dimensions;
 | 
			
		||||
  static const int Nds = Dimensions * 2;  // double stored field
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <int Dimensions, int Colours>
 | 
			
		||||
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
 | 
			
		||||
  static const int Nds = Dimensions * 2;
 | 
			
		||||
  static const int Nd = Dimensions;
 | 
			
		||||
  static const int Nc = Colours;
 | 
			
		||||
 | 
			
		||||
  template <typename vtype> 
 | 
			
		||||
  using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <int Dimensions, int Colours, int Spin>
 | 
			
		||||
struct FermionicLatticeGaugeTheory
 | 
			
		||||
    : public LatticeGaugeTheory<Dimensions, Colours> {
 | 
			
		||||
  static const int Nd = Dimensions;
 | 
			
		||||
  static const int Nds = Dimensions * 2;
 | 
			
		||||
  static const int Nc = Colours;
 | 
			
		||||
  static const int Ns = Spin;
 | 
			
		||||
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
 | 
			
		||||
  // These 2 only if Spin is a multiple of 2
 | 
			
		||||
  static const int Nhs = Spin / 2;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
 | 
			
		||||
 | 
			
		||||
  //tests
 | 
			
		||||
  typedef iColourMatrix<Complex> ColourMatrix;
 | 
			
		||||
  typedef iColourMatrix<ComplexF> ColourMatrixF;
 | 
			
		||||
  typedef iColourMatrix<ComplexD> ColourMatrixD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Examples, not complete now.
 | 
			
		||||
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
 | 
			
		||||
    static const int Xp = 0;
 | 
			
		||||
    static const int Yp = 1;
 | 
			
		||||
    static const int Zp = 2;
 | 
			
		||||
    static const int Tp = 3;
 | 
			
		||||
    static const int Xm = 4;
 | 
			
		||||
    static const int Ym = 5;
 | 
			
		||||
    static const int Zm = 6;
 | 
			
		||||
    static const int Tm = 7;
 | 
			
		||||
 | 
			
		||||
    typedef FermionicLatticeGaugeTheory FLGT;
 | 
			
		||||
 | 
			
		||||
    typedef FLGT::iSpinMatrix<Complex  >          SpinMatrix;
 | 
			
		||||
    typedef FLGT::iSpinMatrix<ComplexF >          SpinMatrixF;
 | 
			
		||||
    typedef FLGT::iSpinMatrix<ComplexD >          SpinMatrixD;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <int Dimensions>
 | 
			
		||||
struct Scalar : public LatticeTheory<Dimensions> {};
 | 
			
		||||
 | 
			
		||||
};  // LatticeTheories
 | 
			
		||||
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,56 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/ActionBase.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015-2016
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef ACTION_BASE_H
 | 
			
		||||
#define ACTION_BASE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
template <class GaugeField >
 | 
			
		||||
class Action 
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  bool is_smeared = false;
 | 
			
		||||
  // Heatbath?
 | 
			
		||||
  virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
 | 
			
		||||
  virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action
 | 
			
		||||
  virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0;        // evaluate the action derivative
 | 
			
		||||
  virtual std::string action_name()    = 0;                             // return the action name
 | 
			
		||||
  virtual std::string LogParameters()  = 0;                             // prints action parameters
 | 
			
		||||
  virtual ~Action(){}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif // ACTION_BASE_H
 | 
			
		||||
@@ -1,92 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/ActionParams.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_QCD_ACTION_PARAMS_H
 | 
			
		||||
#define GRID_QCD_ACTION_PARAMS_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // These can move into a params header and be given MacroMagic serialisation
 | 
			
		||||
  struct GparityWilsonImplParams {
 | 
			
		||||
    bool overlapCommsCompute;
 | 
			
		||||
    std::vector<int> twists;
 | 
			
		||||
    GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct WilsonImplParams {
 | 
			
		||||
    bool overlapCommsCompute;
 | 
			
		||||
    std::vector<Complex> boundary_phases;
 | 
			
		||||
    WilsonImplParams() : overlapCommsCompute(false) {
 | 
			
		||||
      boundary_phases.resize(Nd, 1.0);
 | 
			
		||||
    };
 | 
			
		||||
    WilsonImplParams(const std::vector<Complex> phi)
 | 
			
		||||
      : boundary_phases(phi), overlapCommsCompute(false) {}
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct StaggeredImplParams {
 | 
			
		||||
    StaggeredImplParams()  {};
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  struct OneFlavourRationalParams : Serializable {
 | 
			
		||||
    GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams, 
 | 
			
		||||
				    RealD, lo, 
 | 
			
		||||
				    RealD, hi, 
 | 
			
		||||
				    int,   MaxIter, 
 | 
			
		||||
				    RealD, tolerance, 
 | 
			
		||||
				    int,   degree, 
 | 
			
		||||
				    int,   precision);
 | 
			
		||||
    
 | 
			
		||||
    // MaxIter and tolerance, vectors??
 | 
			
		||||
    
 | 
			
		||||
    // constructor 
 | 
			
		||||
    OneFlavourRationalParams(	RealD _lo      = 0.0, 
 | 
			
		||||
				RealD _hi      = 1.0, 
 | 
			
		||||
				int _maxit     = 1000,
 | 
			
		||||
				RealD tol      = 1.0e-8, 
 | 
			
		||||
                           	int _degree    = 10,
 | 
			
		||||
				int _precision = 64)
 | 
			
		||||
      : lo(_lo),
 | 
			
		||||
	hi(_hi),
 | 
			
		||||
	MaxIter(_maxit),
 | 
			
		||||
	tolerance(tol),
 | 
			
		||||
	degree(_degree),
 | 
			
		||||
	precision(_precision){};
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,100 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/AbstractEOFAFermion.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_ABSTRACT_EOFA_FERMION_H
 | 
			
		||||
#define  GRID_QCD_ABSTRACT_EOFA_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/CayleyFermion5D.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // DJM: Abstract base class for EOFA fermion types.
 | 
			
		||||
  // Defines layout of additional EOFA-specific parameters and operators.
 | 
			
		||||
  // Use to construct EOFA pseudofermion actions that are agnostic to
 | 
			
		||||
  // Shamir / Mobius / etc., and ensure that no one can construct EOFA
 | 
			
		||||
  // pseudofermion action with non-EOFA fermion type.
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  class AbstractEOFAFermion : public CayleyFermion5D<Impl> {
 | 
			
		||||
    public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
      // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm}
 | 
			
		||||
      RealD mq1;
 | 
			
		||||
      RealD mq2;
 | 
			
		||||
      RealD mq3;
 | 
			
		||||
      RealD shift;
 | 
			
		||||
      int pm;
 | 
			
		||||
 | 
			
		||||
      RealD alpha; // Mobius scale
 | 
			
		||||
      RealD k;     // EOFA normalization constant
 | 
			
		||||
 | 
			
		||||
      virtual void Instantiatable(void) = 0;
 | 
			
		||||
 | 
			
		||||
      // EOFA-specific operations
 | 
			
		||||
      // Force user to implement in derived classes
 | 
			
		||||
      virtual void  Omega    (const FermionField& in, FermionField& out, int sign, int dag) = 0;
 | 
			
		||||
      virtual void  Dtilde   (const FermionField& in, FermionField& out) = 0;
 | 
			
		||||
      virtual void  DtildeInv(const FermionField& in, FermionField& out) = 0;
 | 
			
		||||
 | 
			
		||||
      // Implement derivatives in base class:
 | 
			
		||||
      // for EOFA both DWF and Mobius just need d(Dw)/dU
 | 
			
		||||
      virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
        this->DhopDeriv(mat, U, V, dag);
 | 
			
		||||
      };
 | 
			
		||||
      virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
        this->DhopDerivOE(mat, U, V, dag);
 | 
			
		||||
      };
 | 
			
		||||
      virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){
 | 
			
		||||
        this->DhopDerivEO(mat, U, V, dag);
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      // Recompute 5D coefficients for different value of shift constant
 | 
			
		||||
      // (needed for heatbath loop over poles)
 | 
			
		||||
      virtual void RefreshShiftCoefficients(RealD new_shift) = 0;
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
 | 
			
		||||
        GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
 | 
			
		||||
        RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm,
 | 
			
		||||
        RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams())
 | 
			
		||||
        : CayleyFermion5D<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,
 | 
			
		||||
          _mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm)
 | 
			
		||||
      {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        this->alpha = _b + _c;
 | 
			
		||||
        this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
 | 
			
		||||
                    ( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
 | 
			
		||||
                    ( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
 | 
			
		||||
      };
 | 
			
		||||
  };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,438 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    DomainWallEOFAFermion<Impl>::DomainWallEOFAFermion(
 | 
			
		||||
      GaugeField            &_Umu,
 | 
			
		||||
      GridCartesian         &FiveDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
      GridCartesian         &FourDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
      RealD _mq1, RealD _mq2, RealD _mq3,
 | 
			
		||||
      RealD _shift, int _pm, RealD _M5, const ImplParams &p) :
 | 
			
		||||
    AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
 | 
			
		||||
        FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
 | 
			
		||||
        _shift, _pm, _M5, 1.0, 0.0, p)
 | 
			
		||||
    {
 | 
			
		||||
        RealD eps = 1.0;
 | 
			
		||||
        Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);
 | 
			
		||||
        assert(zdata->n == this->Ls);
 | 
			
		||||
 | 
			
		||||
        std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl;
 | 
			
		||||
        this->SetCoefficientsTanh(zdata, 1.0, 0.0);
 | 
			
		||||
 | 
			
		||||
        Approx::zolotarev_free(zdata);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /***************************************************************
 | 
			
		||||
     * Additional EOFA operators only called outside the inverter.
 | 
			
		||||
     * Since speed is not essential, simple axpby-style
 | 
			
		||||
     * implementations should be fine.
 | 
			
		||||
     ***************************************************************/
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        Din = zero;
 | 
			
		||||
        if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
 | 
			
		||||
        else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
 | 
			
		||||
        else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }
 | 
			
		||||
        else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // This is just the identity for DWF
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; }
 | 
			
		||||
 | 
			
		||||
    // This is just the identity for DWF
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; }
 | 
			
		||||
 | 
			
		||||
    /*****************************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD DomainWallEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
        this->Meooe5D(psi, Din);
 | 
			
		||||
        this->DW(Din, chi, DaggerNo);
 | 
			
		||||
        axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
        this->M5D(psi, chi);
 | 
			
		||||
        return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD DomainWallEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
        this->DW(psi, Din, DaggerYes);
 | 
			
		||||
        this->MeooeDag5D(Din, chi);
 | 
			
		||||
        this->M5Ddag(psi, chi);
 | 
			
		||||
        axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
        return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /********************************************************************
 | 
			
		||||
     * Performance critical fermion operators called inside the inverter
 | 
			
		||||
     ********************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
 | 
			
		||||
        // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
 | 
			
		||||
        Coeff_t shiftp(0.0), shiftm(0.0);
 | 
			
		||||
        if(shift != 0.0){
 | 
			
		||||
          if(pm == 1){ shiftp = shift*(mq3-mq2); }
 | 
			
		||||
          else{ shiftm = -shift*(mq3-mq2); }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm;
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls,-1.0); lower[0]    = mq1 + shiftp;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl;
 | 
			
		||||
            for(int i=0; i<diag.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<upper.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<lower.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        this->M5D(psi, chi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
 | 
			
		||||
        // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} )
 | 
			
		||||
        Coeff_t shiftp(0.0), shiftm(0.0);
 | 
			
		||||
        if(shift != 0.0){
 | 
			
		||||
          if(pm == 1){ shiftp = shift*(mq3-mq2); }
 | 
			
		||||
          else{ shiftm = -shift*(mq3-mq2); }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp;
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls,-1.0); lower[0]    = mq1 + shiftm;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl;
 | 
			
		||||
            for(int i=0; i<diag.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "diag[" << i << "] =" << diag[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<upper.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "upper[" << i << "] =" << upper[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            for(int i=0; i<lower.size(); ++i){
 | 
			
		||||
                std::cout << GridLogMessage << "lower[" << i << "] =" << lower[i] << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        this->M5Ddag(psi, chi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // half checkerboard operations
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
          upper[s] = -this->cee[s];
 | 
			
		||||
          lower[s] = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
        upper[Ls-1] = this->dm;
 | 
			
		||||
        lower[0]    = this->dp;
 | 
			
		||||
 | 
			
		||||
        this->M5D(psi, psi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
        std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
        std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
          upper[s] = -this->cee[s];
 | 
			
		||||
          lower[s] = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
        upper[Ls-1] = this->dp;
 | 
			
		||||
        lower[0]    = this->dm;
 | 
			
		||||
 | 
			
		||||
        this->M5Ddag(psi, psi, chi, lower, diag, upper);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /****************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    //Zolo
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c)
 | 
			
		||||
    {
 | 
			
		||||
        int   Ls    = this->Ls;
 | 
			
		||||
        int   pm    = this->pm;
 | 
			
		||||
        RealD mq1   = this->mq1;
 | 
			
		||||
        RealD mq2   = this->mq2;
 | 
			
		||||
        RealD mq3   = this->mq3;
 | 
			
		||||
        RealD shift = this->shift;
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////////////////////////////////
 | 
			
		||||
        // Constants for the preconditioned matrix Cayley form
 | 
			
		||||
        ////////////////////////////////////////////////////////
 | 
			
		||||
        this->bs.resize(Ls);
 | 
			
		||||
        this->cs.resize(Ls);
 | 
			
		||||
        this->aee.resize(Ls);
 | 
			
		||||
        this->aeo.resize(Ls);
 | 
			
		||||
        this->bee.resize(Ls);
 | 
			
		||||
        this->beo.resize(Ls);
 | 
			
		||||
        this->cee.resize(Ls);
 | 
			
		||||
        this->ceo.resize(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
          this->bee[i] = 4.0 - this->M5 + 1.0;
 | 
			
		||||
          this->cee[i] = 1.0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
          this->aee[i] = this->cee[i];
 | 
			
		||||
          this->bs[i] = this->beo[i] = 1.0;
 | 
			
		||||
          this->cs[i] = this->ceo[i] = 0.0;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        // EOFA shift terms
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        if(pm == 1){
 | 
			
		||||
          this->dp = mq1*this->cee[0] + shift*(mq3-mq2);
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1];
 | 
			
		||||
        } else if(this->pm == -1) {
 | 
			
		||||
          this->dp = mq1*this->cee[0];
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1] - shift*(mq3-mq2);
 | 
			
		||||
        } else {
 | 
			
		||||
          this->dp = mq1*this->cee[0];
 | 
			
		||||
          this->dm = mq1*this->cee[Ls-1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        // LDU decomposition of eeoo
 | 
			
		||||
        //////////////////////////////////////////
 | 
			
		||||
        this->dee.resize(Ls+1);
 | 
			
		||||
        this->lee.resize(Ls);
 | 
			
		||||
        this->leem.resize(Ls);
 | 
			
		||||
        this->uee.resize(Ls);
 | 
			
		||||
        this->ueem.resize(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int i=0; i<Ls; ++i){
 | 
			
		||||
 | 
			
		||||
          if(i < Ls-1){
 | 
			
		||||
 | 
			
		||||
            this->lee[i] = -this->cee[i+1]/this->bee[i]; // sub-diag entry on the ith column
 | 
			
		||||
 | 
			
		||||
            this->leem[i] = this->dm/this->bee[i];
 | 
			
		||||
            for(int j=0; j<i; j++){ this->leem[i] *= this->aee[j]/this->bee[j]; }
 | 
			
		||||
 | 
			
		||||
            this->dee[i] = this->bee[i];
 | 
			
		||||
 | 
			
		||||
            this->uee[i] = -this->aee[i]/this->bee[i];   // up-diag entry on the ith row
 | 
			
		||||
 | 
			
		||||
            this->ueem[i] = this->dp / this->bee[0];
 | 
			
		||||
            for(int j=1; j<=i; j++){ this->ueem[i] *= this->cee[j]/this->bee[j]; }
 | 
			
		||||
 | 
			
		||||
          } else {
 | 
			
		||||
 | 
			
		||||
            this->lee[i]  = 0.0;
 | 
			
		||||
            this->leem[i] = 0.0;
 | 
			
		||||
            this->uee[i]  = 0.0;
 | 
			
		||||
            this->ueem[i] = 0.0;
 | 
			
		||||
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        {
 | 
			
		||||
          Coeff_t delta_d = 1.0 / this->bee[0];
 | 
			
		||||
          for(int j=1; j<Ls-1; j++){ delta_d *= this->cee[j] / this->bee[j]; }
 | 
			
		||||
          this->dee[Ls-1] = this->bee[Ls-1] + this->cee[0] * this->dm * delta_d;
 | 
			
		||||
          this->dee[Ls] = this->bee[Ls-1] + this->cee[Ls-1] * this->dp * delta_d;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        int inv = 1;
 | 
			
		||||
        this->MooeeInternalCompute(0, inv, this->MatpInv, this->MatmInv);
 | 
			
		||||
        this->MooeeInternalCompute(1, inv, this->MatpInvDag, this->MatmInvDag);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Recompute Cayley-form coefficients for different shift
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
 | 
			
		||||
    {
 | 
			
		||||
        this->shift = new_shift;
 | 
			
		||||
        Approx::zolotarev_data *zdata = Approx::higham(1.0, this->Ls);
 | 
			
		||||
        this->SetCoefficientsTanh(zdata, 1.0, 0.0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
 | 
			
		||||
        Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        GridBase* grid = this->FermionRedBlackGrid();
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
 | 
			
		||||
        if(LLs == Ls){ return; } // Not vectorised in 5th direction
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
        Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            Pplus(s,s)  = this->bee[s];
 | 
			
		||||
            Pminus(s,s) = this->bee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Pplus (0,Ls-1) = this->dp;
 | 
			
		||||
        Pminus(Ls-1,0) = this->dm;
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXcd PplusMat ;
 | 
			
		||||
        Eigen::MatrixXcd PminusMat;
 | 
			
		||||
 | 
			
		||||
        #if(0)
 | 
			
		||||
            std::cout << GridLogMessage << "Pplus:" << std::endl;
 | 
			
		||||
            for(int s=0; s<Ls; ++s){
 | 
			
		||||
                for(int ss=0; ss<Ls; ++ss){
 | 
			
		||||
                    std::cout << Pplus(s,ss) << "\t";
 | 
			
		||||
                }
 | 
			
		||||
                std::cout << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
            std::cout << GridLogMessage << "Pminus:" << std::endl;
 | 
			
		||||
            for(int s=0; s<Ls; ++s){
 | 
			
		||||
                for(int ss=0; ss<Ls; ++ss){
 | 
			
		||||
                    std::cout << Pminus(s,ss) << "\t";
 | 
			
		||||
                }
 | 
			
		||||
                std::cout << std::endl;
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        if(inv) {
 | 
			
		||||
            PplusMat  = Pplus.inverse();
 | 
			
		||||
            PminusMat = Pminus.inverse();
 | 
			
		||||
        } else {
 | 
			
		||||
            PplusMat  = Pplus;
 | 
			
		||||
            PminusMat = Pminus;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(dag){
 | 
			
		||||
            PplusMat.adjointInPlace();
 | 
			
		||||
            PminusMat.adjointInPlace();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        typedef typename SiteHalfSpinor::scalar_type scalar_type;
 | 
			
		||||
        const int Nsimd = Simd::Nsimd();
 | 
			
		||||
        Matp.resize(Ls*LLs);
 | 
			
		||||
        Matm.resize(Ls*LLs);
 | 
			
		||||
 | 
			
		||||
        for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
            int istride = LLs;
 | 
			
		||||
            int ostride = 1;
 | 
			
		||||
            Simd Vp;
 | 
			
		||||
            Simd Vm;
 | 
			
		||||
            scalar_type *sp = (scalar_type*) &Vp;
 | 
			
		||||
            scalar_type *sm = (scalar_type*) &Vm;
 | 
			
		||||
            for(int l=0; l<Nsimd; l++){
 | 
			
		||||
                if(switcheroo<Coeff_t>::iscomplex()) {
 | 
			
		||||
                    sp[l] = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
                    sm[l] = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
                } else {
 | 
			
		||||
                    // if real
 | 
			
		||||
                    scalar_type tmp;
 | 
			
		||||
                    tmp = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
                    sp[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
                    tmp = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
                    sm[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            Matp[LLs*s2+s1] = Vp;
 | 
			
		||||
            Matm[LLs*s2+s1] = Vm;
 | 
			
		||||
        }}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FermOpTemplateInstantiate(DomainWallEOFAFermion);
 | 
			
		||||
    GparityFermOpTemplateInstantiate(DomainWallEOFAFermion);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,115 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
 | 
			
		||||
#define  GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  class DomainWallEOFAFermion : public AbstractEOFAFermion<Impl>
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
      // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee
 | 
			
		||||
      // for red-black preconditioned Shamir EOFA
 | 
			
		||||
      Coeff_t dm;
 | 
			
		||||
      Coeff_t dp;
 | 
			
		||||
 | 
			
		||||
      virtual void Instantiatable(void) {};
 | 
			
		||||
 | 
			
		||||
      // EOFA-specific operations
 | 
			
		||||
      virtual void  Omega      (const FermionField& in, FermionField& out, int sign, int dag);
 | 
			
		||||
      virtual void  Dtilde     (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  DtildeInv  (const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      // override multiply
 | 
			
		||||
      virtual RealD M          (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual RealD Mdag       (const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      // half checkerboard operations
 | 
			
		||||
      virtual void  Mooee      (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeDag   (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInv   (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInvDag(const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      virtual void   M5D       (const FermionField& psi, FermionField& chi);
 | 
			
		||||
      virtual void   M5Ddag    (const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // Instantiate different versions depending on Impl
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
 | 
			
		||||
 | 
			
		||||
      void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
 | 
			
		||||
        Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
 | 
			
		||||
        Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      virtual void RefreshShiftCoefficients(RealD new_shift);
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
 | 
			
		||||
        GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
 | 
			
		||||
        RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
 | 
			
		||||
        RealD _M5, const ImplParams& p=ImplParams());
 | 
			
		||||
 | 
			
		||||
    protected:
 | 
			
		||||
      void SetCoefficientsInternal(RealD zolo_hi, std::vector<Coeff_t>& gamma, RealD b, RealD c);
 | 
			
		||||
  };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_DPERP_DWF_EOFA(A)\
 | 
			
		||||
template void DomainWallEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void DomainWallEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
#undef  DOMAIN_WALL_EOFA_DPERP_DENSE
 | 
			
		||||
#define DOMAIN_WALL_EOFA_DPERP_CACHE
 | 
			
		||||
#undef  DOMAIN_WALL_EOFA_DPERP_LINALG
 | 
			
		||||
#define DOMAIN_WALL_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,248 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
 | 
			
		||||
    // Pminus fowards
 | 
			
		||||
    // Pplus  backwards..
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
            for(int s=0; s<Ls; s++){
 | 
			
		||||
                auto tmp = psi._odata[0];
 | 
			
		||||
                if(s==0) {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else if(s==(Ls-1)) {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else {
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
            auto tmp = psi._odata[0];
 | 
			
		||||
            for(int s=0; s<Ls; s++){
 | 
			
		||||
                if(s==0) {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else if(s==(Ls-1)) {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                } else {
 | 
			
		||||
                    spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
                    chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
                    spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
                    chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
 | 
			
		||||
            auto tmp1 = psi._odata[0];
 | 
			
		||||
            auto tmp2 = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
            // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls  = 12*Ls * (9) = 108*Ls flops
 | 
			
		||||
            // Apply (L^{\prime})^{-1}
 | 
			
		||||
            chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
            for(int s=1; s<Ls; s++){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s-1]);
 | 
			
		||||
                chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // L_m^{-1}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
                chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // U_m^{-1} D^{-1}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
                spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
                chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
            spProj5m(tmp2, chi[ss+Ls-1]);
 | 
			
		||||
            chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2;
 | 
			
		||||
 | 
			
		||||
            // Apply U^{-1}
 | 
			
		||||
            for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s+1]);
 | 
			
		||||
                chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        assert(psi.checkerboard == psi.checkerboard);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        std::vector<Coeff_t> ueec(Ls);
 | 
			
		||||
        std::vector<Coeff_t> deec(Ls+1);
 | 
			
		||||
        std::vector<Coeff_t> leec(Ls);
 | 
			
		||||
        std::vector<Coeff_t> ueemc(Ls);
 | 
			
		||||
        std::vector<Coeff_t> leemc(Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<ueec.size(); s++){
 | 
			
		||||
            ueec[s]  = conjugate(this->uee[s]);
 | 
			
		||||
            deec[s]  = conjugate(this->dee[s]);
 | 
			
		||||
            leec[s]  = conjugate(this->lee[s]);
 | 
			
		||||
            ueemc[s] = conjugate(this->ueem[s]);
 | 
			
		||||
            leemc[s] = conjugate(this->leem[s]);
 | 
			
		||||
        }
 | 
			
		||||
        deec[Ls] = conjugate(this->dee[Ls]);
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){ // adds Ls
 | 
			
		||||
 | 
			
		||||
            auto tmp1 = psi._odata[0];
 | 
			
		||||
            auto tmp2 = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
            // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
            chi[ss] = psi[ss];
 | 
			
		||||
            for(int s=1; s<Ls; s++){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+s-1]);
 | 
			
		||||
                chi[ss+s] = psi[ss+s] - ueec[s-1]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // U_m^{-\dagger}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
                chi[ss+Ls-1] = chi[ss+Ls-1] - ueemc[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
            for(int s=0; s<Ls-1; s++){
 | 
			
		||||
                spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
                chi[ss+s] = (1.0/deec[s])*chi[ss+s] - (leemc[s]/deec[Ls-1])*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
            spProj5p(tmp2, chi[ss+Ls-1]);
 | 
			
		||||
            chi[ss+Ls-1] = (1.0/deec[Ls-1])*tmp1 + (1.0/deec[Ls])*tmp2;
 | 
			
		||||
 | 
			
		||||
            // Apply L^{-dagger}
 | 
			
		||||
            for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
                spProj5p(tmp1, chi[ss+s+1]);
 | 
			
		||||
                chi[ss+s] = chi[ss+s] - leec[s]*tmp1;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_CACHE
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,159 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    * Dense matrix versions of routines
 | 
			
		||||
    */
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
        int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        assert(Ls==LLs);
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
        Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
        for(int s=0;s<Ls;s++){
 | 
			
		||||
            Pplus(s,s)  = this->bee[s];
 | 
			
		||||
            Pminus(s,s) = this->bee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Pplus (0,Ls-1) = this->dp;
 | 
			
		||||
        Pminus(Ls-1,0) = this->dm;
 | 
			
		||||
 | 
			
		||||
        Eigen::MatrixXd PplusMat ;
 | 
			
		||||
        Eigen::MatrixXd PminusMat;
 | 
			
		||||
 | 
			
		||||
        if(inv) {
 | 
			
		||||
            PplusMat  = Pplus.inverse();
 | 
			
		||||
            PminusMat = Pminus.inverse();
 | 
			
		||||
        } else {
 | 
			
		||||
            PplusMat  = Pplus;
 | 
			
		||||
            PminusMat = Pminus;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(dag){
 | 
			
		||||
            PplusMat.adjointInPlace();
 | 
			
		||||
            PminusMat.adjointInPlace();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // For the non-vectorised s-direction this is simple
 | 
			
		||||
 | 
			
		||||
        for(auto site=0; site<vol; site++){
 | 
			
		||||
 | 
			
		||||
            SiteSpinor     SiteChi;
 | 
			
		||||
            SiteHalfSpinor SitePplus;
 | 
			
		||||
            SiteHalfSpinor SitePminus;
 | 
			
		||||
 | 
			
		||||
            for(int s1=0; s1<Ls; s1++){
 | 
			
		||||
                SiteChi = zero;
 | 
			
		||||
                for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
                    int lex2 = s2 + Ls*site;
 | 
			
		||||
                    if(PplusMat(s1,s2) != 0.0){
 | 
			
		||||
                        spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
                        accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
 | 
			
		||||
                    }
 | 
			
		||||
                    if(PminusMat(s1,s2) != 0.0){
 | 
			
		||||
                        spProj5m(SitePminus, psi[lex2]);
 | 
			
		||||
                        accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                chi[s1+Ls*site] = SiteChi*0.5;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_DENSE
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,168 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
    // Pminus fowards
 | 
			
		||||
    // Pplus  backwards
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            if(s==0) {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
            } else if (s==(Ls-1)) {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
              axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            } else {
 | 
			
		||||
              axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        for(int s=0; s<Ls; s++){
 | 
			
		||||
            if(s==0) {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
            } else if (s==(Ls-1)) {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            } else {
 | 
			
		||||
              axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
              axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        Coeff_t czero(0.0);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
        // Apply (L^{\prime})^{-1}
 | 
			
		||||
        axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
        for(int s=1; s<Ls; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // L_m^{-1}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
            axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // U_m^{-1} D^{-1}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1);
 | 
			
		||||
        }
 | 
			
		||||
        axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
        // Apply U^{-1}
 | 
			
		||||
        for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
            axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        Coeff_t one(1.0);
 | 
			
		||||
        Coeff_t czero(0.0);
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
        FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
        // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
        axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
        for(int s=1; s<Ls; s++){
 | 
			
		||||
            axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // U_m^{-\dagger}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
        for(int s=0; s<Ls-1; s++){
 | 
			
		||||
            axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
        }
 | 
			
		||||
        axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
        // Apply L^{-dagger}
 | 
			
		||||
        for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
            axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_LINALG
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,605 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    * Dense matrix versions of routines
 | 
			
		||||
    */
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
        this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
        const int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
        assert(Ls/LLs == nsimd);
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // just directly address via type pun
 | 
			
		||||
        typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
        scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
        scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
        scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
        for(int o=0;o<LLs;o++){ // outer
 | 
			
		||||
        for(int i=0;i<nsimd;i++){ //inner
 | 
			
		||||
            int s  = o + i*LLs;
 | 
			
		||||
            int ss = o*nsimd + i;
 | 
			
		||||
            u_p[ss] = upper[s];
 | 
			
		||||
            l_p[ss] = lower[s];
 | 
			
		||||
            d_p[ss] = diag[s];
 | 
			
		||||
        }}
 | 
			
		||||
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        assert(Nc == 3);
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
            #if 0
 | 
			
		||||
 | 
			
		||||
                alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
                alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
                alignas(64) SiteSpinor fp;
 | 
			
		||||
                alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
                for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                    int vp = (v+1)%LLs;
 | 
			
		||||
                    int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
                    spProj5m(hp, psi[ss+vp]);
 | 
			
		||||
                    spProj5p(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
                    if (vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
                    if (vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
                    hp = 0.5*hp;
 | 
			
		||||
                    hm = 0.5*hm;
 | 
			
		||||
 | 
			
		||||
                    spRecon5m(fp, hp);
 | 
			
		||||
                    spRecon5p(fm, hm);
 | 
			
		||||
 | 
			
		||||
                    chi[ss+v] = d[v]*phi[ss+v];
 | 
			
		||||
                    chi[ss+v] = chi[ss+v] + u[v]*fp;
 | 
			
		||||
                    chi[ss+v] = chi[ss+v] + l[v]*fm;
 | 
			
		||||
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
            #else
 | 
			
		||||
 | 
			
		||||
                for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                    vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
                    int vp = (v==LLs-1) ? 0     : v+1;
 | 
			
		||||
                    int vm = (v==0)     ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
                    Simd hp_00 = psi[ss+vp]()(2)(0);
 | 
			
		||||
                    Simd hp_01 = psi[ss+vp]()(2)(1);
 | 
			
		||||
                    Simd hp_02 = psi[ss+vp]()(2)(2);
 | 
			
		||||
                    Simd hp_10 = psi[ss+vp]()(3)(0);
 | 
			
		||||
                    Simd hp_11 = psi[ss+vp]()(3)(1);
 | 
			
		||||
                    Simd hp_12 = psi[ss+vp]()(3)(2);
 | 
			
		||||
 | 
			
		||||
                    Simd hm_00 = psi[ss+vm]()(0)(0);
 | 
			
		||||
                    Simd hm_01 = psi[ss+vm]()(0)(1);
 | 
			
		||||
                    Simd hm_02 = psi[ss+vm]()(0)(2);
 | 
			
		||||
                    Simd hm_10 = psi[ss+vm]()(1)(0);
 | 
			
		||||
                    Simd hm_11 = psi[ss+vm]()(1)(1);
 | 
			
		||||
                    Simd hm_12 = psi[ss+vm]()(1)(2);
 | 
			
		||||
 | 
			
		||||
                    if(vp <= v){
 | 
			
		||||
                        hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
                        hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
                        hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
                        hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
                        hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
                        hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    if(vm >= v){
 | 
			
		||||
                        hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
                        hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
                        hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
                        hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
                        hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
                        hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Can force these to real arithmetic and save 2x.
 | 
			
		||||
                    Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
                    Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
                    Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
                    Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
                    Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
                    Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
                    Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
                    Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
                    Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
                    Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
                    Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
                    Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
                    vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
                    vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
                    vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
                    vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
            #endif
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
        FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
    {
 | 
			
		||||
        GridBase* grid = psi._grid;
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = grid->_rdimensions[0];
 | 
			
		||||
        int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
        Vector<iSinglet<Simd> > d(LLs);
 | 
			
		||||
 | 
			
		||||
        assert(Ls/LLs == nsimd);
 | 
			
		||||
        assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        // just directly address via type pun
 | 
			
		||||
        typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
        scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
        scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
        scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
        for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
        for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
            int s  = o + i*LLs;
 | 
			
		||||
            int ss = o*nsimd + i;
 | 
			
		||||
            u_p[ss] = upper[s];
 | 
			
		||||
            l_p[ss] = lower[s];
 | 
			
		||||
            d_p[ss] = diag[s];
 | 
			
		||||
        }}
 | 
			
		||||
 | 
			
		||||
        this->M5Dcalls++;
 | 
			
		||||
        this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
        parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
        #if 0
 | 
			
		||||
 | 
			
		||||
            alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
            alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
            alignas(64) SiteSpinor fp;
 | 
			
		||||
            alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
            for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                int vp = (v+1)%LLs;
 | 
			
		||||
                int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
                spProj5p(hp, psi[ss+vp]);
 | 
			
		||||
                spProj5m(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
                if(vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
                if(vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
                hp = hp*0.5;
 | 
			
		||||
                hm = hm*0.5;
 | 
			
		||||
                spRecon5p(fp, hp);
 | 
			
		||||
                spRecon5m(fm, hm);
 | 
			
		||||
 | 
			
		||||
                chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
                chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        #else
 | 
			
		||||
 | 
			
		||||
            for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
                vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
                int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
                int vm = (v == 0    ) ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
                Simd hp_00 = psi[ss+vp]()(0)(0);
 | 
			
		||||
                Simd hp_01 = psi[ss+vp]()(0)(1);
 | 
			
		||||
                Simd hp_02 = psi[ss+vp]()(0)(2);
 | 
			
		||||
                Simd hp_10 = psi[ss+vp]()(1)(0);
 | 
			
		||||
                Simd hp_11 = psi[ss+vp]()(1)(1);
 | 
			
		||||
                Simd hp_12 = psi[ss+vp]()(1)(2);
 | 
			
		||||
 | 
			
		||||
                Simd hm_00 = psi[ss+vm]()(2)(0);
 | 
			
		||||
                Simd hm_01 = psi[ss+vm]()(2)(1);
 | 
			
		||||
                Simd hm_02 = psi[ss+vm]()(2)(2);
 | 
			
		||||
                Simd hm_10 = psi[ss+vm]()(3)(0);
 | 
			
		||||
                Simd hm_11 = psi[ss+vm]()(3)(1);
 | 
			
		||||
                Simd hm_12 = psi[ss+vm]()(3)(2);
 | 
			
		||||
 | 
			
		||||
                if (vp <= v){
 | 
			
		||||
                    hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
                    hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
                    hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
                    hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
                    hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
                    hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if(vm >= v){
 | 
			
		||||
                    hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
                    hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
                    hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
                    hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
                    hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
                    hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
                Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
                Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
                Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
                Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
                Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
                Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
                Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
                Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
                Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
                Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
                Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
 | 
			
		||||
                vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
                vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
                vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
                vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
                vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
                vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
            }
 | 
			
		||||
        #endif
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->M5Dtime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef AVX512
 | 
			
		||||
        #include<simd/Intel512common.h>
 | 
			
		||||
        #include<simd/Intel512avx.h>
 | 
			
		||||
        #include<simd/Intel512single.h>
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
        int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        #ifndef AVX512
 | 
			
		||||
        {
 | 
			
		||||
            SiteHalfSpinor BcastP;
 | 
			
		||||
            SiteHalfSpinor BcastM;
 | 
			
		||||
            SiteHalfSpinor SiteChiP;
 | 
			
		||||
            SiteHalfSpinor SiteChiM;
 | 
			
		||||
 | 
			
		||||
            // Ls*Ls * 2 * 12 * vol flops
 | 
			
		||||
            for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
                for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
                for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
                    int s = s2 + l*LLs;
 | 
			
		||||
                    int lex = s2 + LLs*site;
 | 
			
		||||
 | 
			
		||||
                    if( s2==0 && l==0 ){
 | 
			
		||||
                        SiteChiP=zero;
 | 
			
		||||
                        SiteChiM=zero;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
 | 
			
		||||
                    }}
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
 | 
			
		||||
                    }}
 | 
			
		||||
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
 | 
			
		||||
                        SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
 | 
			
		||||
                    }}
 | 
			
		||||
                }}
 | 
			
		||||
 | 
			
		||||
                {
 | 
			
		||||
                    int lex = s1 + LLs*site;
 | 
			
		||||
                    for(int sp=0; sp<2;  sp++){
 | 
			
		||||
                    for(int co=0; co<Nc; co++){
 | 
			
		||||
                        vstream(chi[lex]()(sp)(co),   SiteChiP()(sp)(co));
 | 
			
		||||
                        vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
 | 
			
		||||
                    }}
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
        #else
 | 
			
		||||
        {
 | 
			
		||||
            // pointers
 | 
			
		||||
            //  MASK_REGS;
 | 
			
		||||
            #define Chi_00 %%zmm1
 | 
			
		||||
            #define Chi_01 %%zmm2
 | 
			
		||||
            #define Chi_02 %%zmm3
 | 
			
		||||
            #define Chi_10 %%zmm4
 | 
			
		||||
            #define Chi_11 %%zmm5
 | 
			
		||||
            #define Chi_12 %%zmm6
 | 
			
		||||
            #define Chi_20 %%zmm7
 | 
			
		||||
            #define Chi_21 %%zmm8
 | 
			
		||||
            #define Chi_22 %%zmm9
 | 
			
		||||
            #define Chi_30 %%zmm10
 | 
			
		||||
            #define Chi_31 %%zmm11
 | 
			
		||||
            #define Chi_32 %%zmm12
 | 
			
		||||
 | 
			
		||||
            #define BCAST0  %%zmm13
 | 
			
		||||
            #define BCAST1  %%zmm14
 | 
			
		||||
            #define BCAST2  %%zmm15
 | 
			
		||||
            #define BCAST3  %%zmm16
 | 
			
		||||
            #define BCAST4  %%zmm17
 | 
			
		||||
            #define BCAST5  %%zmm18
 | 
			
		||||
            #define BCAST6  %%zmm19
 | 
			
		||||
            #define BCAST7  %%zmm20
 | 
			
		||||
            #define BCAST8  %%zmm21
 | 
			
		||||
            #define BCAST9  %%zmm22
 | 
			
		||||
            #define BCAST10 %%zmm23
 | 
			
		||||
            #define BCAST11 %%zmm24
 | 
			
		||||
 | 
			
		||||
            int incr = LLs*LLs*sizeof(iSinglet<Simd>);
 | 
			
		||||
            for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
                for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
 | 
			
		||||
                    int lex = s2 + LLs*site;
 | 
			
		||||
                    uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
 | 
			
		||||
                    uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
 | 
			
		||||
                    uint64_t a2 = (uint64_t) &psi[lex];
 | 
			
		||||
 | 
			
		||||
                    for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
                        if((s2+l)==0) {
 | 
			
		||||
                            asm(
 | 
			
		||||
                                    VPREFETCH1(0,%2)              VPREFETCH1(0,%1)
 | 
			
		||||
                                    VPREFETCH1(12,%2)  	          VPREFETCH1(13,%2)
 | 
			
		||||
                                    VPREFETCH1(14,%2)  	          VPREFETCH1(15,%2)
 | 
			
		||||
                                    VBCASTCDUP(0,%2,BCAST0)
 | 
			
		||||
                                    VBCASTCDUP(1,%2,BCAST1)
 | 
			
		||||
                                    VBCASTCDUP(2,%2,BCAST2)
 | 
			
		||||
                                    VBCASTCDUP(3,%2,BCAST3)
 | 
			
		||||
                                    VBCASTCDUP(4,%2,BCAST4)       VMULMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                                    VBCASTCDUP(5,%2,BCAST5)       VMULMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                                    VBCASTCDUP(6,%2,BCAST6)       VMULMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                                    VBCASTCDUP(7,%2,BCAST7)       VMULMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                                    VBCASTCDUP(8,%2,BCAST8)       VMULMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                                    VBCASTCDUP(9,%2,BCAST9)       VMULMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                                    VBCASTCDUP(10,%2,BCAST10)     VMULMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                                    VBCASTCDUP(11,%2,BCAST11)     VMULMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                                    VMULMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                                    : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
                        } else {
 | 
			
		||||
                            asm(
 | 
			
		||||
                                    VBCASTCDUP(0,%2,BCAST0)   VMADDMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                                    VBCASTCDUP(1,%2,BCAST1)   VMADDMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                                    VBCASTCDUP(2,%2,BCAST2)   VMADDMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                                    VBCASTCDUP(3,%2,BCAST3)   VMADDMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                                    VBCASTCDUP(4,%2,BCAST4)   VMADDMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                                    VBCASTCDUP(5,%2,BCAST5)   VMADDMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                                    VBCASTCDUP(6,%2,BCAST6)   VMADDMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                                    VBCASTCDUP(7,%2,BCAST7)   VMADDMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                                    VBCASTCDUP(8,%2,BCAST8)   VMADDMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                                    VBCASTCDUP(9,%2,BCAST9)   VMADDMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                                    VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                                    VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                                    : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
                        }
 | 
			
		||||
                        a0 = a0 + incr;
 | 
			
		||||
                        a1 = a1 + incr;
 | 
			
		||||
                        a2 = a2 + sizeof(typename Simd::scalar_type);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                {
 | 
			
		||||
                  int lexa = s1+LLs*site;
 | 
			
		||||
                  asm (
 | 
			
		||||
                     VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01)  VSTORE(2 ,%0,Chi_02)
 | 
			
		||||
                     VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11)  VSTORE(5 ,%0,Chi_12)
 | 
			
		||||
                     VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21)  VSTORE(8 ,%0,Chi_22)
 | 
			
		||||
                     VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31)  VSTORE(11,%0,Chi_32)
 | 
			
		||||
                     : : "r" ((uint64_t)&chi[lexa]) : "memory" );
 | 
			
		||||
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        #undef Chi_00
 | 
			
		||||
        #undef Chi_01
 | 
			
		||||
        #undef Chi_02
 | 
			
		||||
        #undef Chi_10
 | 
			
		||||
        #undef Chi_11
 | 
			
		||||
        #undef Chi_12
 | 
			
		||||
        #undef Chi_20
 | 
			
		||||
        #undef Chi_21
 | 
			
		||||
        #undef Chi_22
 | 
			
		||||
        #undef Chi_30
 | 
			
		||||
        #undef Chi_31
 | 
			
		||||
        #undef Chi_32
 | 
			
		||||
 | 
			
		||||
        #undef BCAST0
 | 
			
		||||
        #undef BCAST1
 | 
			
		||||
        #undef BCAST2
 | 
			
		||||
        #undef BCAST3
 | 
			
		||||
        #undef BCAST4
 | 
			
		||||
        #undef BCAST5
 | 
			
		||||
        #undef BCAST6
 | 
			
		||||
        #undef BCAST7
 | 
			
		||||
        #undef BCAST8
 | 
			
		||||
        #undef BCAST9
 | 
			
		||||
        #undef BCAST10
 | 
			
		||||
        #undef BCAST11
 | 
			
		||||
        #endif
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Z-mobius version
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
        int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
        std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
 | 
			
		||||
        exit(-1);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
    {
 | 
			
		||||
        int Ls  = this->Ls;
 | 
			
		||||
        int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
        int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
        chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
        Vector<iSinglet<Simd> > Matp;
 | 
			
		||||
        Vector<iSinglet<Simd> > Matm;
 | 
			
		||||
        Vector<iSinglet<Simd> > *_Matp;
 | 
			
		||||
        Vector<iSinglet<Simd> > *_Matm;
 | 
			
		||||
 | 
			
		||||
        //  MooeeInternalCompute(dag,inv,Matp,Matm);
 | 
			
		||||
        if(inv && dag){
 | 
			
		||||
            _Matp = &this->MatpInvDag;
 | 
			
		||||
            _Matm = &this->MatmInvDag;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(inv && (!dag)){
 | 
			
		||||
            _Matp = &this->MatpInv;
 | 
			
		||||
            _Matm = &this->MatmInv;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if(!inv){
 | 
			
		||||
            MooeeInternalCompute(dag, inv, Matp, Matm);
 | 
			
		||||
            _Matp = &Matp;
 | 
			
		||||
            _Matm = &Matm;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        assert(_Matp->size() == Ls*LLs);
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvCalls++;
 | 
			
		||||
        this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
        if(switcheroo<Coeff_t>::iscomplex()){
 | 
			
		||||
            parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
                MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
                MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        this->MooeeInvTime += usecond();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #ifdef DOMAIN_WALL_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF);
 | 
			
		||||
        INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
        template void DomainWallEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,502 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
    MobiusEOFAFermion<Impl>::MobiusEOFAFermion(
 | 
			
		||||
      GaugeField            &_Umu,
 | 
			
		||||
      GridCartesian         &FiveDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
      GridCartesian         &FourDimGrid,
 | 
			
		||||
      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
      RealD _mq1, RealD _mq2, RealD _mq3,
 | 
			
		||||
      RealD _shift, int _pm, RealD _M5,
 | 
			
		||||
      RealD _b, RealD _c, const ImplParams &p) :
 | 
			
		||||
    AbstractEOFAFermion<Impl>(_Umu, FiveDimGrid, FiveDimRedBlackGrid,
 | 
			
		||||
        FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3,
 | 
			
		||||
        _shift, _pm, _M5, _b, _c, p)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      RealD eps = 1.0;
 | 
			
		||||
      Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls);
 | 
			
		||||
      assert(zdata->n == this->Ls);
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b <<
 | 
			
		||||
        ",c=" << _c << ") with Ls=" << Ls << std::endl;
 | 
			
		||||
      this->SetCoefficientsTanh(zdata, _b, _c);
 | 
			
		||||
      std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 <<
 | 
			
		||||
        ",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift <<
 | 
			
		||||
        ",pm=" << _pm << ")" << std::endl;
 | 
			
		||||
 | 
			
		||||
      Approx::zolotarev_free(zdata);
 | 
			
		||||
 | 
			
		||||
      if(_shift != 0.0){
 | 
			
		||||
        SetCoefficientsPrecondShiftOps();
 | 
			
		||||
      } else {
 | 
			
		||||
        Mooee_shift.resize(Ls, 0.0);
 | 
			
		||||
        MooeeInv_shift_lc.resize(Ls, 0.0);
 | 
			
		||||
        MooeeInv_shift_norm.resize(Ls, 0.0);
 | 
			
		||||
        MooeeInvDag_shift_lc.resize(Ls, 0.0);
 | 
			
		||||
        MooeeInvDag_shift_norm.resize(Ls, 0.0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /****************************************************************
 | 
			
		||||
     * Additional EOFA operators only called outside the inverter.  
 | 
			
		||||
     * Since speed is not essential, simple axpby-style
 | 
			
		||||
     * implementations should be fine.
 | 
			
		||||
     ***************************************************************/
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din, int sign, int dag)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
      RealD alpha = this->alpha;
 | 
			
		||||
 | 
			
		||||
      Din = zero;
 | 
			
		||||
      if((sign == 1) && (dag == 0)) { // \Omega_{+}
 | 
			
		||||
        for(int s=0; s<Ls; ++s){
 | 
			
		||||
          axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);
 | 
			
		||||
        }
 | 
			
		||||
      } else if((sign == -1) && (dag == 0)) { // \Omega_{-}
 | 
			
		||||
        for(int s=0; s<Ls; ++s){
 | 
			
		||||
          axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,s)/std::pow(1.0+alpha,s+1), psi, s, 0);
 | 
			
		||||
        }
 | 
			
		||||
      } else if((sign == 1 ) && (dag == 1)) { // \Omega_{+}^{\dagger}
 | 
			
		||||
        for(int sp=0; sp<Ls; ++sp){
 | 
			
		||||
          axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,Ls-sp-1)/std::pow(1.0+alpha,Ls-sp), psi, 0, sp);
 | 
			
		||||
        }
 | 
			
		||||
      } else if((sign == -1) && (dag == 1)) { // \Omega_{-}^{\dagger}
 | 
			
		||||
        for(int sp=0; sp<Ls; ++sp){
 | 
			
		||||
          axpby_ssp(Din, 1.0, Din, 2.0*std::pow(1.0-alpha,sp)/std::pow(1.0+alpha,sp+1), psi, 0, sp);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // This is the operator relating the usual Ddwf to TWQCD's EOFA Dirac operator (arXiv:1706.05843, Eqn. 6).
 | 
			
		||||
    // It also relates the preconditioned and unpreconditioned systems described in Appendix B.2.
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::Dtilde(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls    = this->Ls;
 | 
			
		||||
      RealD b   = 0.5 * ( 1.0 + this->alpha );
 | 
			
		||||
      RealD c   = 0.5 * ( 1.0 - this->alpha );
 | 
			
		||||
      RealD mq1 = this->mq1;
 | 
			
		||||
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
        if(s == 0) {
 | 
			
		||||
          axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
 | 
			
		||||
          axpby_ssp_pplus (chi, 1.0, chi, mq1*c, psi, s, Ls-1);
 | 
			
		||||
        } else if(s == (Ls-1)) {
 | 
			
		||||
          axpby_ssp_pminus(chi, b, psi, mq1*c, psi, s, 0);
 | 
			
		||||
          axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
 | 
			
		||||
        } else {
 | 
			
		||||
          axpby_ssp_pminus(chi, b, psi, -c, psi, s, s+1);
 | 
			
		||||
          axpby_ssp_pplus (chi, 1.0, chi, -c, psi, s, s-1);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::DtildeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
      RealD m = this->mq1;
 | 
			
		||||
      RealD c = 0.5 * this->alpha;
 | 
			
		||||
      RealD d = 0.5;
 | 
			
		||||
 | 
			
		||||
      RealD DtInv_p(0.0), DtInv_m(0.0);
 | 
			
		||||
      RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls);
 | 
			
		||||
      FermionField tmp(this->FermionGrid());
 | 
			
		||||
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
      for(int sp=0; sp<Ls; ++sp){
 | 
			
		||||
 | 
			
		||||
        DtInv_p = m * std::pow(-1.0,s-sp+1) * std::pow(c-d,Ls+s-sp) / std::pow(c+d,s-sp+1) / N;
 | 
			
		||||
        DtInv_p += (s < sp) ? 0.0 : std::pow(-1.0,s-sp) * std::pow(c-d,s-sp) / std::pow(c+d,s-sp+1);
 | 
			
		||||
        DtInv_m = m * std::pow(-1.0,sp-s+1) * std::pow(c-d,Ls+sp-s) / std::pow(c+d,sp-s+1) / N;
 | 
			
		||||
        DtInv_m += (s > sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1);
 | 
			
		||||
 | 
			
		||||
        if(sp == 0){
 | 
			
		||||
          axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp);
 | 
			
		||||
          axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp);
 | 
			
		||||
        } else {
 | 
			
		||||
          axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp);
 | 
			
		||||
          axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      }}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /*****************************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD MobiusEOFAFermion<Impl>::M(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
      this->Meooe5D(psi, Din);
 | 
			
		||||
      this->DW(Din, chi, DaggerNo);
 | 
			
		||||
      axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
      this->M5D(psi, chi);
 | 
			
		||||
      return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    RealD MobiusEOFAFermion<Impl>::Mdag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      FermionField Din(psi._grid);
 | 
			
		||||
 | 
			
		||||
      this->DW(psi, Din, DaggerYes);
 | 
			
		||||
      this->MeooeDag5D(Din, chi);
 | 
			
		||||
      this->M5Ddag(psi, chi);
 | 
			
		||||
      axpby(chi, 1.0, 1.0, chi, psi);
 | 
			
		||||
      return(norm2(chi));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /********************************************************************
 | 
			
		||||
     * Performance critical fermion operators called inside the inverter
 | 
			
		||||
     ********************************************************************/
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls,-1.0);  upper[Ls-1] = this->mq1;
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls,-1.0);  lower[0]    = this->mq1;
 | 
			
		||||
 | 
			
		||||
      // no shift term
 | 
			
		||||
      if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); }
 | 
			
		||||
 | 
			
		||||
      // fused M + shift operation
 | 
			
		||||
      else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls,-1.0);  upper[Ls-1] = this->mq1;
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls,-1.0);  lower[0]    = this->mq1;
 | 
			
		||||
 | 
			
		||||
      // no shift term
 | 
			
		||||
      if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); }
 | 
			
		||||
 | 
			
		||||
      // fused M + shift operation
 | 
			
		||||
      else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // half checkerboard operations
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::Mooee(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      // coefficients of Mooee
 | 
			
		||||
      std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        upper[s] = -this->cee[s];
 | 
			
		||||
        lower[s] = -this->cee[s];
 | 
			
		||||
      }
 | 
			
		||||
      upper[Ls-1] *= -this->mq1;
 | 
			
		||||
      lower[0]    *= -this->mq1;
 | 
			
		||||
 | 
			
		||||
      // no shift term
 | 
			
		||||
      if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); }
 | 
			
		||||
 | 
			
		||||
      // fused M + shift operation
 | 
			
		||||
      else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::MooeeDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      // coefficients of MooeeDag
 | 
			
		||||
      std::vector<Coeff_t> diag = this->bee;
 | 
			
		||||
      std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
      std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(s==0) {
 | 
			
		||||
          upper[s] = -this->cee[s+1];
 | 
			
		||||
          lower[s] = this->mq1*this->cee[Ls-1];
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          upper[s] = this->mq1*this->cee[0];
 | 
			
		||||
          lower[s] = -this->cee[s-1];
 | 
			
		||||
        } else {
 | 
			
		||||
          upper[s] = -this->cee[s+1];
 | 
			
		||||
          lower[s] = -this->cee[s-1];
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // no shift term
 | 
			
		||||
      if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); }
 | 
			
		||||
 | 
			
		||||
      // fused M + shift operation
 | 
			
		||||
      else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /****************************************************************************************/
 | 
			
		||||
 | 
			
		||||
    // Computes coefficients for applying Cayley preconditioned shift operators
 | 
			
		||||
    //  (Mooee + \Delta) --> Mooee_shift
 | 
			
		||||
    //  (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm
 | 
			
		||||
    //  (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm
 | 
			
		||||
    // For the latter two cases, the operation takes the form
 | 
			
		||||
    //  [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} +
 | 
			
		||||
    //      ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} )
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::SetCoefficientsPrecondShiftOps()
 | 
			
		||||
    {
 | 
			
		||||
      int   Ls    = this->Ls;
 | 
			
		||||
      int   pm    = this->pm;
 | 
			
		||||
      RealD alpha = this->alpha;
 | 
			
		||||
      RealD k     = this->k;
 | 
			
		||||
      RealD mq1   = this->mq1;
 | 
			
		||||
      RealD shift = this->shift;
 | 
			
		||||
 | 
			
		||||
      // Initialize
 | 
			
		||||
      Mooee_shift.resize(Ls);
 | 
			
		||||
      MooeeInv_shift_lc.resize(Ls);
 | 
			
		||||
      MooeeInv_shift_norm.resize(Ls);
 | 
			
		||||
      MooeeInvDag_shift_lc.resize(Ls);
 | 
			
		||||
      MooeeInvDag_shift_norm.resize(Ls);
 | 
			
		||||
 | 
			
		||||
      // Construct Mooee_shift
 | 
			
		||||
      int idx(0);
 | 
			
		||||
      Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) *
 | 
			
		||||
                  ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
        idx = (pm == 1) ? (s) : (Ls-1-s);
 | 
			
		||||
        Mooee_shift[idx] = N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Tridiagonal solve for MooeeInvDag_shift_lc
 | 
			
		||||
      {
 | 
			
		||||
        Coeff_t m(0.0);
 | 
			
		||||
        std::vector<Coeff_t> d = Mooee_shift;
 | 
			
		||||
        std::vector<Coeff_t> u(Ls,0.0);
 | 
			
		||||
        std::vector<Coeff_t> y(Ls,0.0);
 | 
			
		||||
        std::vector<Coeff_t> q(Ls,0.0);
 | 
			
		||||
        if(pm == 1){ u[0] = 1.0; }
 | 
			
		||||
        else{ u[Ls-1] = 1.0; }
 | 
			
		||||
 | 
			
		||||
        // Tridiagonal matrix algorithm + Sherman-Morrison formula
 | 
			
		||||
        //
 | 
			
		||||
        // We solve
 | 
			
		||||
        //  ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift
 | 
			
		||||
        // where Mooee' is the tridiagonal part of Mooee_{+}, and
 | 
			
		||||
        // u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen
 | 
			
		||||
        // so that the outer-product u \otimes v gives the (0,Ls-1)
 | 
			
		||||
        // entry of Mooee_{+}.
 | 
			
		||||
        //
 | 
			
		||||
        // We do this as two solves: Mooee'*y = d and Mooee'*q = u,
 | 
			
		||||
        // and then construct the solution to the original system
 | 
			
		||||
        //  MooeeInvDag_shift_lc = y - <v,y> / ( 1 + <v,q> ) q
 | 
			
		||||
        if(pm == 1){
 | 
			
		||||
          for(int s=1; s<Ls; ++s){
 | 
			
		||||
            m = -this->cee[s] / this->bee[s-1];
 | 
			
		||||
            d[s] -= m*d[s-1];
 | 
			
		||||
            u[s] -= m*u[s-1];
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        y[Ls-1] = d[Ls-1] / this->bee[Ls-1];
 | 
			
		||||
        q[Ls-1] = u[Ls-1] / this->bee[Ls-1];
 | 
			
		||||
        for(int s=Ls-2; s>=0; --s){
 | 
			
		||||
          if(pm == 1){
 | 
			
		||||
            y[s] = d[s] / this->bee[s];
 | 
			
		||||
            q[s] = u[s] / this->bee[s];
 | 
			
		||||
          } else {
 | 
			
		||||
            y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s];
 | 
			
		||||
            q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s];
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Construct MooeeInvDag_shift_lc
 | 
			
		||||
        for(int s=0; s<Ls; ++s){
 | 
			
		||||
          if(pm == 1){
 | 
			
		||||
            MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[0]*y[Ls-1] /
 | 
			
		||||
              (1.0+mq1*this->cee[0]*q[Ls-1]) * q[s];
 | 
			
		||||
          } else {
 | 
			
		||||
            MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] /
 | 
			
		||||
              (1.0+mq1*this->cee[Ls-1]*q[0]) * q[s];
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Compute remaining coefficients
 | 
			
		||||
        N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]);
 | 
			
		||||
        for(int s=0; s<Ls; ++s){
 | 
			
		||||
 | 
			
		||||
          // MooeeInv_shift_lc
 | 
			
		||||
          if(pm == 1){ MooeeInv_shift_lc[s] = std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s); }
 | 
			
		||||
          else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); }
 | 
			
		||||
 | 
			
		||||
          // MooeeInv_shift_norm
 | 
			
		||||
          MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] /
 | 
			
		||||
            ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N;
 | 
			
		||||
 | 
			
		||||
          // MooeeInvDag_shift_norm
 | 
			
		||||
          if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) /
 | 
			
		||||
            ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
 | 
			
		||||
          else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) /
 | 
			
		||||
            ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Recompute coefficients for a different value of shift constant
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::RefreshShiftCoefficients(RealD new_shift)
 | 
			
		||||
    {
 | 
			
		||||
      this->shift = new_shift;
 | 
			
		||||
      if(new_shift != 0.0){
 | 
			
		||||
        SetCoefficientsPrecondShiftOps();
 | 
			
		||||
      } else {
 | 
			
		||||
        int Ls = this->Ls;
 | 
			
		||||
        Mooee_shift.resize(Ls,0.0);
 | 
			
		||||
        MooeeInv_shift_lc.resize(Ls,0.0);
 | 
			
		||||
        MooeeInv_shift_norm.resize(Ls,0.0);
 | 
			
		||||
        MooeeInvDag_shift_lc.resize(Ls,0.0);
 | 
			
		||||
        MooeeInvDag_shift_norm.resize(Ls,0.0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    void MobiusEOFAFermion<Impl>::MooeeInternalCompute(int dag, int inv,
 | 
			
		||||
      Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
    {
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
      GridBase* grid = this->FermionRedBlackGrid();
 | 
			
		||||
      int LLs = grid->_rdimensions[0];
 | 
			
		||||
 | 
			
		||||
      if(LLs == Ls){ return; } // Not vectorised in 5th direction
 | 
			
		||||
 | 
			
		||||
      Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
      Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        Pplus(s,s)  = this->bee[s];
 | 
			
		||||
        Pminus(s,s) = this->bee[s];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
        Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      Pplus (0,Ls-1) = this->mq1*this->cee[0];
 | 
			
		||||
      Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1];
 | 
			
		||||
 | 
			
		||||
      if(this->shift != 0.0){
 | 
			
		||||
        RealD c = 0.5 * this->alpha;
 | 
			
		||||
        RealD d = 0.5;
 | 
			
		||||
        RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) );
 | 
			
		||||
        if(this->pm == 1) {
 | 
			
		||||
          for(int s=0; s<Ls; ++s){
 | 
			
		||||
            Pplus(s,Ls-1) += N * std::pow(-1.0,s) * std::pow(c-d,s) / std::pow(c+d,Ls+s+1);
 | 
			
		||||
          }
 | 
			
		||||
        } else {
 | 
			
		||||
          for(int s=0; s<Ls; ++s){
 | 
			
		||||
            Pminus(s,0) += N * std::pow(-1.0,s+1) * std::pow(c-d,Ls-1-s) / std::pow(c+d,2*Ls-s);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      Eigen::MatrixXcd PplusMat ;
 | 
			
		||||
      Eigen::MatrixXcd PminusMat;
 | 
			
		||||
 | 
			
		||||
      if(inv) {
 | 
			
		||||
        PplusMat  = Pplus.inverse();
 | 
			
		||||
        PminusMat = Pminus.inverse();
 | 
			
		||||
      } else {
 | 
			
		||||
        PplusMat  = Pplus;
 | 
			
		||||
        PminusMat = Pminus;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if(dag){
 | 
			
		||||
        PplusMat.adjointInPlace();
 | 
			
		||||
        PminusMat.adjointInPlace();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      typedef typename SiteHalfSpinor::scalar_type scalar_type;
 | 
			
		||||
      const int Nsimd = Simd::Nsimd();
 | 
			
		||||
      Matp.resize(Ls*LLs);
 | 
			
		||||
      Matm.resize(Ls*LLs);
 | 
			
		||||
 | 
			
		||||
      for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
      for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
        int istride = LLs;
 | 
			
		||||
        int ostride = 1;
 | 
			
		||||
        Simd Vp;
 | 
			
		||||
        Simd Vm;
 | 
			
		||||
        scalar_type *sp = (scalar_type*) &Vp;
 | 
			
		||||
        scalar_type *sm = (scalar_type*) &Vm;
 | 
			
		||||
        for(int l=0; l<Nsimd; l++){
 | 
			
		||||
          if(switcheroo<Coeff_t>::iscomplex()) {
 | 
			
		||||
            sp[l] = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
            sm[l] = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
          } else {
 | 
			
		||||
            // if real
 | 
			
		||||
            scalar_type tmp;
 | 
			
		||||
            tmp = PplusMat (l*istride+s1*ostride,s2);
 | 
			
		||||
            sp[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
            tmp = PminusMat(l*istride+s1*ostride,s2);
 | 
			
		||||
            sm[l] = scalar_type(tmp.real(),tmp.real());
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        Matp[LLs*s2+s1] = Vp;
 | 
			
		||||
        Matm[LLs*s2+s1] = Vm;
 | 
			
		||||
      }}
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  FermOpTemplateInstantiate(MobiusEOFAFermion);
 | 
			
		||||
  GparityFermOpTemplateInstantiate(MobiusEOFAFermion);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,133 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_MOBIUS_EOFA_FERMION_H
 | 
			
		||||
#define  GRID_QCD_MOBIUS_EOFA_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/AbstractEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  class MobiusEOFAFermion : public AbstractEOFAFermion<Impl>
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
      // Shift operator coefficients for red-black preconditioned Mobius EOFA
 | 
			
		||||
      std::vector<Coeff_t> Mooee_shift;
 | 
			
		||||
      std::vector<Coeff_t> MooeeInv_shift_lc;
 | 
			
		||||
      std::vector<Coeff_t> MooeeInv_shift_norm;
 | 
			
		||||
      std::vector<Coeff_t> MooeeInvDag_shift_lc;
 | 
			
		||||
      std::vector<Coeff_t> MooeeInvDag_shift_norm;
 | 
			
		||||
 | 
			
		||||
      virtual void Instantiatable(void) {};
 | 
			
		||||
 | 
			
		||||
      // EOFA-specific operations
 | 
			
		||||
      virtual void  Omega            (const FermionField& in, FermionField& out, int sign, int dag);
 | 
			
		||||
      virtual void  Dtilde           (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  DtildeInv        (const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      // override multiply
 | 
			
		||||
      virtual RealD M                (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual RealD Mdag             (const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      // half checkerboard operations
 | 
			
		||||
      virtual void  Mooee            (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeDag         (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInv         (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInv_shift   (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInvDag      (const FermionField& in, FermionField& out);
 | 
			
		||||
      virtual void  MooeeInvDag_shift(const FermionField& in, FermionField& out);
 | 
			
		||||
 | 
			
		||||
      virtual void   M5D             (const FermionField& psi, FermionField& chi);
 | 
			
		||||
      virtual void   M5Ddag          (const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // Instantiate different versions depending on Impl
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
 | 
			
		||||
 | 
			
		||||
      void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
        std::vector<Coeff_t>& shift_coeffs);
 | 
			
		||||
 | 
			
		||||
      void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper);
 | 
			
		||||
 | 
			
		||||
      void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi,
 | 
			
		||||
        std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
        std::vector<Coeff_t>& shift_coeffs);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalCompute(int dag, int inv, Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site,
 | 
			
		||||
        Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site,
 | 
			
		||||
        Vector<iSinglet<Simd>>& Matp, Vector<iSinglet<Simd>>& Matm);
 | 
			
		||||
 | 
			
		||||
      virtual void RefreshShiftCoefficients(RealD new_shift);
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid,
 | 
			
		||||
        GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid,
 | 
			
		||||
        RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm,
 | 
			
		||||
        RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams());
 | 
			
		||||
 | 
			
		||||
    protected:
 | 
			
		||||
      void SetCoefficientsPrecondShiftOps(void);
 | 
			
		||||
  };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \
 | 
			
		||||
  std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& shift_coeffs); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInv(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInv_shift(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInvDag(const FermionField& psi, FermionField& chi); \
 | 
			
		||||
template void MobiusEOFAFermion<A>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi);
 | 
			
		||||
 | 
			
		||||
#undef  MOBIUS_EOFA_DPERP_DENSE
 | 
			
		||||
#define MOBIUS_EOFA_DPERP_CACHE
 | 
			
		||||
#undef  MOBIUS_EOFA_DPERP_LINALG
 | 
			
		||||
#define MOBIUS_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,429 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        auto tmp = psi._odata[0];
 | 
			
		||||
        if(s==0){
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
    std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        auto tmp = psi._odata[0];
 | 
			
		||||
        if(s==0){
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
        if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
        else{ spProj5m(tmp, psi._odata[ss+shift_s]); }
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(s==0) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
 | 
			
		||||
    std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
 | 
			
		||||
    std::vector<Coeff_t> &shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // Flops = 6.0*(Nc*Ns) *Ls*vol
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
      chi[ss+Ls-1] = zero;
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(s==0) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+Ls-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else if(s==(Ls-1)) {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+0]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        } else {
 | 
			
		||||
          spProj5p(tmp, psi._odata[ss+s+1]);
 | 
			
		||||
          chi[ss+s] = diag[s]*phi[ss+s] + upper[s]*tmp;
 | 
			
		||||
          spProj5m(tmp, psi._odata[ss+s-1]);
 | 
			
		||||
          chi[ss+s] = chi[ss+s] + lower[s]*tmp;
 | 
			
		||||
        }
 | 
			
		||||
        if(this->pm == 1){ spProj5p(tmp, psi._odata[ss+s]); }
 | 
			
		||||
        else{ spProj5m(tmp, psi._odata[ss+s]); }
 | 
			
		||||
        chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (L^{\prime})^{-1}
 | 
			
		||||
      chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
        spProj5m(tmp, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-1} D^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
        spProj5p(tmp, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
      // Apply U^{-1}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        spProj5m(tmp, chi[ss+s+1]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->uee[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp1        = psi._odata[0];
 | 
			
		||||
      auto tmp2        = psi._odata[0];
 | 
			
		||||
      auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
      chi[ss] = psi[ss]; // chi[0]=psi[0]
 | 
			
		||||
      tmp2 = MooeeInv_shift_lc[0]*psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->lee[s-1]*tmp1;
 | 
			
		||||
        tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s];
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
      else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->leem[s]*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-1} D^{-1}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){ // Chi[s] + 1/d chi[s]
 | 
			
		||||
        spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
      // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
      spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
 | 
			
		||||
      // Apply U^{-1} and add shift term
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1;
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (U^{\prime})^{-dag}
 | 
			
		||||
      chi[ss] = psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5m(tmp, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-\dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-\dag} D^{-dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5m(tmp, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
 | 
			
		||||
      // Apply L^{-dag}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        spProj5p(tmp, chi[ss+s+1]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->lee[s]*tmp;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase *grid = psi._grid;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
 | 
			
		||||
 | 
			
		||||
      auto tmp1        = psi._odata[0];
 | 
			
		||||
      auto tmp2        = psi._odata[0];
 | 
			
		||||
      auto tmp2_spProj = psi._odata[0];
 | 
			
		||||
 | 
			
		||||
      // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2
 | 
			
		||||
      chi[ss] = psi[ss];
 | 
			
		||||
      tmp2 = MooeeInvDag_shift_lc[0]*psi[ss];
 | 
			
		||||
      for(int s=1; s<Ls; s++){
 | 
			
		||||
        spProj5m(tmp1, chi[ss+s-1]);
 | 
			
		||||
        chi[ss+s] = psi[ss+s] - this->uee[s-1]*tmp1;
 | 
			
		||||
        tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s];
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);}
 | 
			
		||||
      else{ spProj5m(tmp2_spProj, tmp2); }
 | 
			
		||||
 | 
			
		||||
      // U_m^{-\dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+Ls-1] = chi[ss+Ls-1] - this->ueem[s]*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // L_m^{-\dag} D^{-dag}
 | 
			
		||||
      for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        spProj5m(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
        chi[ss+s] = (1.0/this->dee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1;
 | 
			
		||||
      }
 | 
			
		||||
      chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1];
 | 
			
		||||
      spProj5p(tmp1, chi[ss+Ls-1]);
 | 
			
		||||
      chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj;
 | 
			
		||||
 | 
			
		||||
      // Apply L^{-dag}
 | 
			
		||||
      for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
        chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1;
 | 
			
		||||
        spProj5p(tmp1, chi[ss+s]);
 | 
			
		||||
        chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_CACHE
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,184 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid_Eigen_Dense.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  * Dense matrix versions of routines
 | 
			
		||||
  */
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
    int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
    int pm      = this->pm;
 | 
			
		||||
    RealD shift = this->shift;
 | 
			
		||||
    RealD alpha = this->alpha;
 | 
			
		||||
    RealD k     = this->k;
 | 
			
		||||
    RealD mq1   = this->mq1;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    assert(Ls==LLs);
 | 
			
		||||
 | 
			
		||||
    Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
    Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
        Pplus(s,s)  = this->bee[s];
 | 
			
		||||
        Pminus(s,s) = this->bee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        Pminus(s,s+1) = -this->cee[s];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
        Pplus(s+1,s) = -this->cee[s+1];
 | 
			
		||||
    }
 | 
			
		||||
    Pplus (0,Ls-1) = mq1*this->cee[0];
 | 
			
		||||
    Pminus(Ls-1,0) = mq1*this->cee[Ls-1];
 | 
			
		||||
 | 
			
		||||
    if(shift != 0.0){
 | 
			
		||||
      Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) );
 | 
			
		||||
      for(int s=0; s<Ls; ++s){
 | 
			
		||||
        if(pm == 1){ Pplus(s,Ls-1) += shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
 | 
			
		||||
        else{ Pminus(Ls-1-s,Ls-1) -= shift * k * N * std::pow(-1.0,s) * std::pow(alpha-1.0,s) / std::pow(alpha+1.0,Ls+s+1); }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Eigen::MatrixXd PplusMat ;
 | 
			
		||||
    Eigen::MatrixXd PminusMat;
 | 
			
		||||
 | 
			
		||||
    if(inv){
 | 
			
		||||
      PplusMat  = Pplus.inverse();
 | 
			
		||||
      PminusMat = Pminus.inverse();
 | 
			
		||||
    } else {
 | 
			
		||||
      PplusMat  = Pplus;
 | 
			
		||||
      PminusMat = Pminus;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(dag){
 | 
			
		||||
      PplusMat.adjointInPlace();
 | 
			
		||||
      PminusMat.adjointInPlace();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // For the non-vectorised s-direction this is simple
 | 
			
		||||
 | 
			
		||||
    for(auto site=0; site<vol; site++){
 | 
			
		||||
 | 
			
		||||
        SiteSpinor     SiteChi;
 | 
			
		||||
        SiteHalfSpinor SitePplus;
 | 
			
		||||
        SiteHalfSpinor SitePminus;
 | 
			
		||||
 | 
			
		||||
        for(int s1=0; s1<Ls; s1++){
 | 
			
		||||
            SiteChi = zero;
 | 
			
		||||
            for(int s2=0; s2<Ls; s2++){
 | 
			
		||||
                int lex2 = s2 + Ls*site;
 | 
			
		||||
                if(PplusMat(s1,s2) != 0.0){
 | 
			
		||||
                    spProj5p(SitePplus,psi[lex2]);
 | 
			
		||||
                    accumRecon5p(SiteChi, PplusMat(s1,s2)*SitePplus);
 | 
			
		||||
                }
 | 
			
		||||
                if(PminusMat(s1,s2) != 0.0){
 | 
			
		||||
                    spProj5m(SitePminus, psi[lex2]);
 | 
			
		||||
                    accumRecon5m(SiteChi, PminusMat(s1,s2)*SitePminus);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            chi[s1+Ls*site] = SiteChi*0.5;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_DENSE
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<GparityWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<WilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZWilsonImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,290 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  // FIXME -- make a version of these routines with site loop outermost for cache reuse.
 | 
			
		||||
  // Pminus fowards
 | 
			
		||||
  // Pplus  backwards
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pplus (chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pminus(chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pplus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
    for(int s=0; s<Ls; s++){
 | 
			
		||||
      if(s==0) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, Ls-1);
 | 
			
		||||
      } else if (s==(Ls-1)) {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, 0);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      } else {
 | 
			
		||||
        axpby_ssp_pplus (chi, diag[s], phi, upper[s], psi, s, s+1);
 | 
			
		||||
        axpby_ssp_pminus(chi, one, chi, lower[s], psi, s, s-1);
 | 
			
		||||
      }
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply U^{-1}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Apply (L^{\prime})^{-1}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0);
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, psi, -this->lee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1]
 | 
			
		||||
      axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->leem[s], chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-1} D^{-1}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one/this->dee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply U^{-1} and add shift term
 | 
			
		||||
    if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1);  // chi[Ls]
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
 | 
			
		||||
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger}
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dagger}
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    Coeff_t one(1.0);
 | 
			
		||||
    Coeff_t czero(0.0);
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
    int Ls = this->Ls;
 | 
			
		||||
 | 
			
		||||
    FermionField tmp(psi._grid);
 | 
			
		||||
 | 
			
		||||
    // Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0]
 | 
			
		||||
    axpby_ssp(chi, one, psi, czero, psi, 0, 0);      // chi[0]=psi[0]
 | 
			
		||||
    axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0);
 | 
			
		||||
    for(int s=1; s<Ls; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one, psi, -conjugate(this->uee[s-1]), chi, s, s-1);
 | 
			
		||||
      axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // U_m^{-\dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->ueem[s]), chi, Ls-1, s);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // L_m^{-\dagger} D^{-dagger}
 | 
			
		||||
    for(int s=0; s<Ls-1; s++){
 | 
			
		||||
      axpby_ssp_pminus(chi, one/conjugate(this->dee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1);
 | 
			
		||||
    }
 | 
			
		||||
    axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1);
 | 
			
		||||
 | 
			
		||||
    // Apply L^{-dagger} and add shift
 | 
			
		||||
    if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); }
 | 
			
		||||
    for(int s=Ls-2; s>=0; s--){
 | 
			
		||||
      axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1);  // chi[Ls]
 | 
			
		||||
      if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
      else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_LINALG
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,983 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  * Dense matrix versions of routines
 | 
			
		||||
  */
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerNo, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, FermionField& chi)
 | 
			
		||||
  {
 | 
			
		||||
    this->MooeeInternal(psi, chi, DaggerYes, InverseYes);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase* grid  = psi._grid;
 | 
			
		||||
    int Ls          = this->Ls;
 | 
			
		||||
    int LLs         = grid->_rdimensions[0];
 | 
			
		||||
    const int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd>> u(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> l(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> d(LLs);
 | 
			
		||||
 | 
			
		||||
    assert(Ls/LLs == nsimd);
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // just directly address via type pun
 | 
			
		||||
    typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
    scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
    scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
    scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
    for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
    for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
      int s   = o + i*LLs;
 | 
			
		||||
      int ss  = o*nsimd + i;
 | 
			
		||||
      u_p[ss] = upper[s];
 | 
			
		||||
      l_p[ss] = lower[s];
 | 
			
		||||
      d_p[ss] = diag[s];
 | 
			
		||||
    }}
 | 
			
		||||
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    assert(Nc == 3);
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
      #if 0
 | 
			
		||||
 | 
			
		||||
        alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
        alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
        alignas(64) SiteSpinor fp;
 | 
			
		||||
        alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          int vp = (v+1)%LLs;
 | 
			
		||||
          int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
          spProj5m(hp, psi[ss+vp]);
 | 
			
		||||
          spProj5p(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
          if (vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
          if (vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
          hp = 0.5*hp;
 | 
			
		||||
          hm = 0.5*hm;
 | 
			
		||||
 | 
			
		||||
          spRecon5m(fp, hp);
 | 
			
		||||
          spRecon5p(fm, hm);
 | 
			
		||||
 | 
			
		||||
          chi[ss+v] = d[v]*phi[ss+v];
 | 
			
		||||
          chi[ss+v] = chi[ss+v] + u[v]*fp;
 | 
			
		||||
          chi[ss+v] = chi[ss+v] + l[v]*fm;
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #else
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0)     ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(2)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(2)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(2)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(3)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(3)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(0)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(0)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(0)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(1)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(1)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          if(vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          // Can force these to real arithmetic and save 2x.
 | 
			
		||||
          Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
          Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
          Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
          Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
          Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
          Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
          Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #endif
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    #if 0
 | 
			
		||||
 | 
			
		||||
      this->M5D(psi, phi, chi, lower, diag, upper);
 | 
			
		||||
 | 
			
		||||
      // FIXME: possible gain from vectorizing shift operation as well?
 | 
			
		||||
      Coeff_t one(1.0);
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); }
 | 
			
		||||
        else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    #else
 | 
			
		||||
 | 
			
		||||
      GridBase* grid  = psi._grid;
 | 
			
		||||
      int Ls          = this->Ls;
 | 
			
		||||
      int LLs         = grid->_rdimensions[0];
 | 
			
		||||
      const int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
      Vector<iSinglet<Simd>> u(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> l(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> d(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> s(LLs);
 | 
			
		||||
 | 
			
		||||
      assert(Ls/LLs == nsimd);
 | 
			
		||||
      assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
      chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
      // just directly address via type pun
 | 
			
		||||
      typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
      scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
      scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
      scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
      scalar_type* s_p = (scalar_type*) &s[0];
 | 
			
		||||
 | 
			
		||||
      for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
      for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
        int s   = o + i*LLs;
 | 
			
		||||
        int ss  = o*nsimd + i;
 | 
			
		||||
        u_p[ss] = upper[s];
 | 
			
		||||
        l_p[ss] = lower[s];
 | 
			
		||||
        d_p[ss] = diag[s];
 | 
			
		||||
        s_p[ss] = shift_coeffs[s];
 | 
			
		||||
      }}
 | 
			
		||||
 | 
			
		||||
      this->M5Dcalls++;
 | 
			
		||||
      this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
      assert(Nc == 3);
 | 
			
		||||
 | 
			
		||||
      parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
        int vs     = (this->pm == 1) ? LLs-1 : 0;
 | 
			
		||||
        Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0);
 | 
			
		||||
        Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1);
 | 
			
		||||
        Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2);
 | 
			
		||||
        Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0);
 | 
			
		||||
        Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1);
 | 
			
		||||
        Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2);
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0)     ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(2)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(2)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(2)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(3)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(3)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(0)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(0)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(0)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(1)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(1)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          if(vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(this->pm == 1 && vs <= v){
 | 
			
		||||
            hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
 | 
			
		||||
            hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
 | 
			
		||||
            hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
 | 
			
		||||
            hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
 | 
			
		||||
            hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
 | 
			
		||||
            hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(this->pm == -1 && vs >= v){
 | 
			
		||||
            hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
 | 
			
		||||
            hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
 | 
			
		||||
            hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
 | 
			
		||||
            hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
 | 
			
		||||
            hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
 | 
			
		||||
            hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          // Can force these to real arithmetic and save 2x.
 | 
			
		||||
          Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
 | 
			
		||||
          Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
 | 
			
		||||
          Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
 | 
			
		||||
          Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
 | 
			
		||||
          Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
 | 
			
		||||
          Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
 | 
			
		||||
          Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this->M5Dtime += usecond();
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase* grid = psi._grid;
 | 
			
		||||
    int Ls  = this->Ls;
 | 
			
		||||
    int LLs = grid->_rdimensions[0];
 | 
			
		||||
    int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd>> u(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> l(LLs);
 | 
			
		||||
    Vector<iSinglet<Simd>> d(LLs);
 | 
			
		||||
 | 
			
		||||
    assert(Ls/LLs == nsimd);
 | 
			
		||||
    assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    // just directly address via type pun
 | 
			
		||||
    typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
    scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
    scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
    scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
 | 
			
		||||
    for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
    for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
      int s  = o + i*LLs;
 | 
			
		||||
      int ss = o*nsimd + i;
 | 
			
		||||
      u_p[ss] = upper[s];
 | 
			
		||||
      l_p[ss] = lower[s];
 | 
			
		||||
      d_p[ss] = diag[s];
 | 
			
		||||
    }}
 | 
			
		||||
 | 
			
		||||
    this->M5Dcalls++;
 | 
			
		||||
    this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
    parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
      #if 0
 | 
			
		||||
 | 
			
		||||
        alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
        alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
        alignas(64) SiteSpinor fp;
 | 
			
		||||
        alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          int vp = (v+1)%LLs;
 | 
			
		||||
          int vm = (v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
          spProj5p(hp, psi[ss+vp]);
 | 
			
		||||
          spProj5m(hm, psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
          if(vp <= v){ rotate(hp, hp, 1); }
 | 
			
		||||
          if(vm >= v){ rotate(hm, hm, nsimd-1); }
 | 
			
		||||
 | 
			
		||||
          hp = hp*0.5;
 | 
			
		||||
          hm = hm*0.5;
 | 
			
		||||
          spRecon5p(fp, hp);
 | 
			
		||||
          spRecon5m(fm, hm);
 | 
			
		||||
 | 
			
		||||
          chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
          chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #else
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0    ) ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(0)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(0)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(0)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(1)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(1)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(2)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(2)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(2)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(3)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(3)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          if (vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          Simd p_00 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_01 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_02 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_10 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_11 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_12 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
          Simd p_20 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00);
 | 
			
		||||
          Simd p_21 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01);
 | 
			
		||||
          Simd p_22 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02);
 | 
			
		||||
          Simd p_30 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10);
 | 
			
		||||
          Simd p_31 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11);
 | 
			
		||||
          Simd p_32 = switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      #endif
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->M5Dtime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
 | 
			
		||||
    FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
 | 
			
		||||
    std::vector<Coeff_t>& shift_coeffs)
 | 
			
		||||
  {
 | 
			
		||||
    #if 0
 | 
			
		||||
 | 
			
		||||
      this->M5Ddag(psi, phi, chi, lower, diag, upper);
 | 
			
		||||
 | 
			
		||||
      // FIXME: possible gain from vectorizing shift operation as well?
 | 
			
		||||
      Coeff_t one(1.0);
 | 
			
		||||
      int Ls = this->Ls;
 | 
			
		||||
      for(int s=0; s<Ls; s++){
 | 
			
		||||
        if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); }
 | 
			
		||||
        else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    #else
 | 
			
		||||
 | 
			
		||||
      GridBase* grid = psi._grid;
 | 
			
		||||
      int Ls  = this->Ls;
 | 
			
		||||
      int LLs = grid->_rdimensions[0];
 | 
			
		||||
      int nsimd = Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
      Vector<iSinglet<Simd>> u(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> l(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> d(LLs);
 | 
			
		||||
      Vector<iSinglet<Simd>> s(LLs);
 | 
			
		||||
 | 
			
		||||
      assert(Ls/LLs == nsimd);
 | 
			
		||||
      assert(phi.checkerboard == psi.checkerboard);
 | 
			
		||||
 | 
			
		||||
      chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
      // just directly address via type pun
 | 
			
		||||
      typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
      scalar_type* u_p = (scalar_type*) &u[0];
 | 
			
		||||
      scalar_type* l_p = (scalar_type*) &l[0];
 | 
			
		||||
      scalar_type* d_p = (scalar_type*) &d[0];
 | 
			
		||||
      scalar_type* s_p = (scalar_type*) &s[0];
 | 
			
		||||
 | 
			
		||||
      for(int o=0; o<LLs; o++){ // outer
 | 
			
		||||
      for(int i=0; i<nsimd; i++){ //inner
 | 
			
		||||
        int s  = o + i*LLs;
 | 
			
		||||
        int ss = o*nsimd + i;
 | 
			
		||||
        u_p[ss] = upper[s];
 | 
			
		||||
        l_p[ss] = lower[s];
 | 
			
		||||
        d_p[ss] = diag[s];
 | 
			
		||||
        s_p[ss] = shift_coeffs[s];
 | 
			
		||||
      }}
 | 
			
		||||
 | 
			
		||||
      this->M5Dcalls++;
 | 
			
		||||
      this->M5Dtime -= usecond();
 | 
			
		||||
 | 
			
		||||
      parallel_for(int ss=0; ss<grid->oSites(); ss+=LLs){ // adds LLs
 | 
			
		||||
 | 
			
		||||
        int vs     = (this->pm == 1) ? LLs-1 : 0;
 | 
			
		||||
        Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0);
 | 
			
		||||
        Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1);
 | 
			
		||||
        Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2);
 | 
			
		||||
        Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0);
 | 
			
		||||
        Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1);
 | 
			
		||||
        Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2);
 | 
			
		||||
 | 
			
		||||
        for(int v=0; v<LLs; v++){
 | 
			
		||||
 | 
			
		||||
          vprefetch(psi[ss+v+LLs]);
 | 
			
		||||
 | 
			
		||||
          int vp = (v == LLs-1) ? 0     : v+1;
 | 
			
		||||
          int vm = (v == 0    ) ? LLs-1 : v-1;
 | 
			
		||||
 | 
			
		||||
          Simd hp_00 = psi[ss+vp]()(0)(0);
 | 
			
		||||
          Simd hp_01 = psi[ss+vp]()(0)(1);
 | 
			
		||||
          Simd hp_02 = psi[ss+vp]()(0)(2);
 | 
			
		||||
          Simd hp_10 = psi[ss+vp]()(1)(0);
 | 
			
		||||
          Simd hp_11 = psi[ss+vp]()(1)(1);
 | 
			
		||||
          Simd hp_12 = psi[ss+vp]()(1)(2);
 | 
			
		||||
 | 
			
		||||
          Simd hm_00 = psi[ss+vm]()(2)(0);
 | 
			
		||||
          Simd hm_01 = psi[ss+vm]()(2)(1);
 | 
			
		||||
          Simd hm_02 = psi[ss+vm]()(2)(2);
 | 
			
		||||
          Simd hm_10 = psi[ss+vm]()(3)(0);
 | 
			
		||||
          Simd hm_11 = psi[ss+vm]()(3)(1);
 | 
			
		||||
          Simd hm_12 = psi[ss+vm]()(3)(2);
 | 
			
		||||
 | 
			
		||||
          if (vp <= v){
 | 
			
		||||
            hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
            hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
            hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
            hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
            hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
            hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(this->pm == 1 && vs <= v){
 | 
			
		||||
            hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v);
 | 
			
		||||
            hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v);
 | 
			
		||||
            hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v);
 | 
			
		||||
            hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v);
 | 
			
		||||
            hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v);
 | 
			
		||||
            hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(vm >= v){
 | 
			
		||||
            hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
            hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
            hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
            hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
            hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
            hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if(this->pm == -1 && vs >= v){
 | 
			
		||||
            hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v);
 | 
			
		||||
            hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v);
 | 
			
		||||
            hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v);
 | 
			
		||||
            hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v);
 | 
			
		||||
            hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v);
 | 
			
		||||
            hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v);
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          Simd p_00 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_00)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_00);
 | 
			
		||||
          Simd p_01 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_01)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_01);
 | 
			
		||||
          Simd p_02 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_02)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_02);
 | 
			
		||||
          Simd p_10 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_10)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_10);
 | 
			
		||||
          Simd p_11 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_11)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_11);
 | 
			
		||||
          Simd p_12 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_12)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo<Coeff_t>::mult(u[v]()()(), hp_12);
 | 
			
		||||
          Simd p_20 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_00)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_00);
 | 
			
		||||
          Simd p_21 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_01)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_01);
 | 
			
		||||
          Simd p_22 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_02)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_02);
 | 
			
		||||
          Simd p_30 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_10)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_10);
 | 
			
		||||
          Simd p_31 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_11)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_11);
 | 
			
		||||
          Simd p_32 = (this->pm == 1) ? switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
 | 
			
		||||
                                      : switcheroo<Coeff_t>::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo<Coeff_t>::mult(l[v]()()(), hm_12)
 | 
			
		||||
                                                                                                 + switcheroo<Coeff_t>::mult(s[v]()()(), hs_12);
 | 
			
		||||
 | 
			
		||||
          vstream(chi[ss+v]()(0)(0), p_00);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(1), p_01);
 | 
			
		||||
          vstream(chi[ss+v]()(0)(2), p_02);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(0), p_10);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(1), p_11);
 | 
			
		||||
          vstream(chi[ss+v]()(1)(2), p_12);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(0), p_20);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(1), p_21);
 | 
			
		||||
          vstream(chi[ss+v]()(2)(2), p_22);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(0), p_30);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(1), p_31);
 | 
			
		||||
          vstream(chi[ss+v]()(3)(2), p_32);
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this->M5Dtime += usecond();
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef AVX512
 | 
			
		||||
    #include<simd/Intel512common.h>
 | 
			
		||||
    #include<simd/Intel512avx.h>
 | 
			
		||||
    #include<simd/Intel512single.h>
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
    int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
  {
 | 
			
		||||
    #ifndef AVX512
 | 
			
		||||
      {
 | 
			
		||||
        SiteHalfSpinor BcastP;
 | 
			
		||||
        SiteHalfSpinor BcastM;
 | 
			
		||||
        SiteHalfSpinor SiteChiP;
 | 
			
		||||
        SiteHalfSpinor SiteChiM;
 | 
			
		||||
 | 
			
		||||
        // Ls*Ls * 2 * 12 * vol flops
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
          for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
          for(int l=0; l < Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
            int s = s2 + l*LLs;
 | 
			
		||||
            int lex = s2 + LLs*site;
 | 
			
		||||
 | 
			
		||||
            if( s2==0 && l==0 ){
 | 
			
		||||
              SiteChiP=zero;
 | 
			
		||||
              SiteChiM=zero;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vbroadcast(BcastP()(sp)(co), psi[lex]()(sp)(co), l);
 | 
			
		||||
            }}
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vbroadcast(BcastM()(sp)(co), psi[lex]()(sp+2)(co), l);
 | 
			
		||||
            }}
 | 
			
		||||
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              SiteChiP()(sp)(co) = real_madd(Matp[LLs*s+s1]()()(), BcastP()(sp)(co), SiteChiP()(sp)(co)); // 1100 us.
 | 
			
		||||
              SiteChiM()(sp)(co) = real_madd(Matm[LLs*s+s1]()()(), BcastM()(sp)(co), SiteChiM()(sp)(co)); // each found by commenting out
 | 
			
		||||
            }}
 | 
			
		||||
          }}
 | 
			
		||||
 | 
			
		||||
          {
 | 
			
		||||
            int lex = s1 + LLs*site;
 | 
			
		||||
            for(int sp=0; sp<2;  sp++){
 | 
			
		||||
            for(int co=0; co<Nc; co++){
 | 
			
		||||
              vstream(chi[lex]()(sp)(co),   SiteChiP()(sp)(co));
 | 
			
		||||
              vstream(chi[lex]()(sp+2)(co), SiteChiM()(sp)(co));
 | 
			
		||||
            }}
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    #else
 | 
			
		||||
      {
 | 
			
		||||
        // pointers
 | 
			
		||||
        //  MASK_REGS;
 | 
			
		||||
        #define Chi_00 %%zmm1
 | 
			
		||||
        #define Chi_01 %%zmm2
 | 
			
		||||
        #define Chi_02 %%zmm3
 | 
			
		||||
        #define Chi_10 %%zmm4
 | 
			
		||||
        #define Chi_11 %%zmm5
 | 
			
		||||
        #define Chi_12 %%zmm6
 | 
			
		||||
        #define Chi_20 %%zmm7
 | 
			
		||||
        #define Chi_21 %%zmm8
 | 
			
		||||
        #define Chi_22 %%zmm9
 | 
			
		||||
        #define Chi_30 %%zmm10
 | 
			
		||||
        #define Chi_31 %%zmm11
 | 
			
		||||
        #define Chi_32 %%zmm12
 | 
			
		||||
 | 
			
		||||
        #define BCAST0  %%zmm13
 | 
			
		||||
        #define BCAST1  %%zmm14
 | 
			
		||||
        #define BCAST2  %%zmm15
 | 
			
		||||
        #define BCAST3  %%zmm16
 | 
			
		||||
        #define BCAST4  %%zmm17
 | 
			
		||||
        #define BCAST5  %%zmm18
 | 
			
		||||
        #define BCAST6  %%zmm19
 | 
			
		||||
        #define BCAST7  %%zmm20
 | 
			
		||||
        #define BCAST8  %%zmm21
 | 
			
		||||
        #define BCAST9  %%zmm22
 | 
			
		||||
        #define BCAST10 %%zmm23
 | 
			
		||||
        #define BCAST11 %%zmm24
 | 
			
		||||
 | 
			
		||||
        int incr = LLs*LLs*sizeof(iSinglet<Simd>);
 | 
			
		||||
 | 
			
		||||
        for(int s1=0; s1<LLs; s1++){
 | 
			
		||||
 | 
			
		||||
          for(int s2=0; s2<LLs; s2++){
 | 
			
		||||
 | 
			
		||||
            int lex = s2 + LLs*site;
 | 
			
		||||
            uint64_t a0 = (uint64_t) &Matp[LLs*s2+s1]; // should be cacheable
 | 
			
		||||
            uint64_t a1 = (uint64_t) &Matm[LLs*s2+s1];
 | 
			
		||||
            uint64_t a2 = (uint64_t) &psi[lex];
 | 
			
		||||
 | 
			
		||||
            for(int l=0; l<Simd::Nsimd(); l++){ // simd lane
 | 
			
		||||
 | 
			
		||||
              if((s2+l)==0) {
 | 
			
		||||
                asm(
 | 
			
		||||
                      VPREFETCH1(0,%2)              VPREFETCH1(0,%1)
 | 
			
		||||
                      VPREFETCH1(12,%2)  	          VPREFETCH1(13,%2)
 | 
			
		||||
                      VPREFETCH1(14,%2)  	          VPREFETCH1(15,%2)
 | 
			
		||||
                      VBCASTCDUP(0,%2,BCAST0)
 | 
			
		||||
                      VBCASTCDUP(1,%2,BCAST1)
 | 
			
		||||
                      VBCASTCDUP(2,%2,BCAST2)
 | 
			
		||||
                      VBCASTCDUP(3,%2,BCAST3)
 | 
			
		||||
                      VBCASTCDUP(4,%2,BCAST4)       VMULMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                      VBCASTCDUP(5,%2,BCAST5)       VMULMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                      VBCASTCDUP(6,%2,BCAST6)       VMULMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                      VBCASTCDUP(7,%2,BCAST7)       VMULMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                      VBCASTCDUP(8,%2,BCAST8)       VMULMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                      VBCASTCDUP(9,%2,BCAST9)       VMULMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                      VBCASTCDUP(10,%2,BCAST10)     VMULMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                      VBCASTCDUP(11,%2,BCAST11)     VMULMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                      VMULMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                      : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
              } else {
 | 
			
		||||
                asm(
 | 
			
		||||
                      VBCASTCDUP(0,%2,BCAST0)   VMADDMEM(0,%0,BCAST0,Chi_00)
 | 
			
		||||
                      VBCASTCDUP(1,%2,BCAST1)   VMADDMEM(0,%0,BCAST1,Chi_01)
 | 
			
		||||
                      VBCASTCDUP(2,%2,BCAST2)   VMADDMEM(0,%0,BCAST2,Chi_02)
 | 
			
		||||
                      VBCASTCDUP(3,%2,BCAST3)   VMADDMEM(0,%0,BCAST3,Chi_10)
 | 
			
		||||
                      VBCASTCDUP(4,%2,BCAST4)   VMADDMEM(0,%0,BCAST4,Chi_11)
 | 
			
		||||
                      VBCASTCDUP(5,%2,BCAST5)   VMADDMEM(0,%0,BCAST5,Chi_12)
 | 
			
		||||
                      VBCASTCDUP(6,%2,BCAST6)   VMADDMEM(0,%1,BCAST6,Chi_20)
 | 
			
		||||
                      VBCASTCDUP(7,%2,BCAST7)   VMADDMEM(0,%1,BCAST7,Chi_21)
 | 
			
		||||
                      VBCASTCDUP(8,%2,BCAST8)   VMADDMEM(0,%1,BCAST8,Chi_22)
 | 
			
		||||
                      VBCASTCDUP(9,%2,BCAST9)   VMADDMEM(0,%1,BCAST9,Chi_30)
 | 
			
		||||
                      VBCASTCDUP(10,%2,BCAST10) VMADDMEM(0,%1,BCAST10,Chi_31)
 | 
			
		||||
                      VBCASTCDUP(11,%2,BCAST11) VMADDMEM(0,%1,BCAST11,Chi_32)
 | 
			
		||||
                      : : "r" (a0), "r" (a1), "r" (a2)                            );
 | 
			
		||||
              }
 | 
			
		||||
 | 
			
		||||
              a0 = a0 + incr;
 | 
			
		||||
              a1 = a1 + incr;
 | 
			
		||||
              a2 = a2 + sizeof(typename Simd::scalar_type);
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          {
 | 
			
		||||
            int lexa = s1+LLs*site;
 | 
			
		||||
            asm (
 | 
			
		||||
               VSTORE(0,%0,Chi_00) VSTORE(1 ,%0,Chi_01)  VSTORE(2 ,%0,Chi_02)
 | 
			
		||||
               VSTORE(3,%0,Chi_10) VSTORE(4 ,%0,Chi_11)  VSTORE(5 ,%0,Chi_12)
 | 
			
		||||
               VSTORE(6,%0,Chi_20) VSTORE(7 ,%0,Chi_21)  VSTORE(8 ,%0,Chi_22)
 | 
			
		||||
               VSTORE(9,%0,Chi_30) VSTORE(10,%0,Chi_31)  VSTORE(11,%0,Chi_32)
 | 
			
		||||
               : : "r" ((uint64_t)&chi[lexa]) : "memory" );
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      #undef Chi_00
 | 
			
		||||
      #undef Chi_01
 | 
			
		||||
      #undef Chi_02
 | 
			
		||||
      #undef Chi_10
 | 
			
		||||
      #undef Chi_11
 | 
			
		||||
      #undef Chi_12
 | 
			
		||||
      #undef Chi_20
 | 
			
		||||
      #undef Chi_21
 | 
			
		||||
      #undef Chi_22
 | 
			
		||||
      #undef Chi_30
 | 
			
		||||
      #undef Chi_31
 | 
			
		||||
      #undef Chi_32
 | 
			
		||||
 | 
			
		||||
      #undef BCAST0
 | 
			
		||||
      #undef BCAST1
 | 
			
		||||
      #undef BCAST2
 | 
			
		||||
      #undef BCAST3
 | 
			
		||||
      #undef BCAST4
 | 
			
		||||
      #undef BCAST5
 | 
			
		||||
      #undef BCAST6
 | 
			
		||||
      #undef BCAST7
 | 
			
		||||
      #undef BCAST8
 | 
			
		||||
      #undef BCAST9
 | 
			
		||||
      #undef BCAST10
 | 
			
		||||
      #undef BCAST11
 | 
			
		||||
 | 
			
		||||
    #endif
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Z-mobius version
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, FermionField& chi,
 | 
			
		||||
    int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << "Error: zMobius not implemented for EOFA" << std::endl;
 | 
			
		||||
    exit(-1);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
 | 
			
		||||
  {
 | 
			
		||||
    int Ls  = this->Ls;
 | 
			
		||||
    int LLs = psi._grid->_rdimensions[0];
 | 
			
		||||
    int vol = psi._grid->oSites()/LLs;
 | 
			
		||||
 | 
			
		||||
    chi.checkerboard = psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
    Vector<iSinglet<Simd>>   Matp;
 | 
			
		||||
    Vector<iSinglet<Simd>>   Matm;
 | 
			
		||||
    Vector<iSinglet<Simd>>* _Matp;
 | 
			
		||||
    Vector<iSinglet<Simd>>* _Matm;
 | 
			
		||||
 | 
			
		||||
    //  MooeeInternalCompute(dag,inv,Matp,Matm);
 | 
			
		||||
    if(inv && dag){
 | 
			
		||||
      _Matp = &this->MatpInvDag;
 | 
			
		||||
      _Matm = &this->MatmInvDag;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(inv && (!dag)){
 | 
			
		||||
      _Matp = &this->MatpInv;
 | 
			
		||||
      _Matm = &this->MatmInv;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!inv){
 | 
			
		||||
      MooeeInternalCompute(dag, inv, Matp, Matm);
 | 
			
		||||
      _Matp = &Matp;
 | 
			
		||||
      _Matm = &Matm;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    assert(_Matp->size() == Ls*LLs);
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvCalls++;
 | 
			
		||||
    this->MooeeInvTime -= usecond();
 | 
			
		||||
 | 
			
		||||
    if(switcheroo<Coeff_t>::iscomplex()){
 | 
			
		||||
      parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
        MooeeInternalZAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      parallel_for(auto site=0; site<vol; site++){
 | 
			
		||||
        MooeeInternalAsm(psi, chi, LLs, site, *_Matp, *_Matm);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->MooeeInvTime += usecond();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef MOBIUS_EOFA_DPERP_VEC
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF);
 | 
			
		||||
    INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<DomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
    template void MobiusEOFAFermion<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,294 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi, Peter Boyle
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
 | 
			
		||||
int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
 | 
			
		||||
 | 
			
		||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if (SE->_is_local ) {						\
 | 
			
		||||
    if (SE->_permute) {						\
 | 
			
		||||
      chi_p = χ						\
 | 
			
		||||
      permute(chi,  in._odata[SE->_offset], ptype);		\
 | 
			
		||||
    } else {							\
 | 
			
		||||
      chi_p = &in._odata[SE->_offset];				\
 | 
			
		||||
    }								\
 | 
			
		||||
  } else {							\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
  }								\
 | 
			
		||||
  multLink(Uchi, U._odata[sU], *chi_p, Dir);			
 | 
			
		||||
 | 
			
		||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if (SE->_is_local ) {						\
 | 
			
		||||
    if (SE->_permute) {						\
 | 
			
		||||
      chi_p = χ						\
 | 
			
		||||
      permute(chi,  in._odata[SE->_offset], ptype);		\
 | 
			
		||||
    } else {							\
 | 
			
		||||
      chi_p = &in._odata[SE->_offset];				\
 | 
			
		||||
    }								\
 | 
			
		||||
  } else if ( st.same_node[Dir] ) {				\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
  }								\
 | 
			
		||||
  if (SE->_is_local || st.same_node[Dir] ) {			\
 | 
			
		||||
    multLink(Uchi, U._odata[sU], *chi_p, Dir);			\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink)		\
 | 
			
		||||
  SE = st.GetEntry(ptype, Dir+skew, sF);			\
 | 
			
		||||
  if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\
 | 
			
		||||
    nmu++;							\
 | 
			
		||||
    chi_p = &buf[SE->_offset];					\
 | 
			
		||||
    multLink(Uchi, U._odata[sU], *chi_p, Dir);			\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Generic implementation; move to different file?
 | 
			
		||||
// Int, Ext, Int+Ext cases for comms overlap
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
					     DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
					     SiteSpinor *buf, int LLs, int sU, 
 | 
			
		||||
					     const FermionField &in, FermionField &out, int dag) {
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int skew;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=LLs*sU+s;
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    skew=8;
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    if ( dag ) { 
 | 
			
		||||
      Uchi = - Uchi;
 | 
			
		||||
    } 
 | 
			
		||||
    vstream(out._odata[sF], Uchi);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  // Only contributions from interior of our node
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
						DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
						SiteSpinor *buf, int LLs, int sU, 
 | 
			
		||||
						const FermionField &in, FermionField &out,int dag) {
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int skew ;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=LLs*sU+s;
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    Uchi=zero;
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    skew=8;
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    if ( dag ) {
 | 
			
		||||
      Uchi = - Uchi;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out._odata[sF], Uchi);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
  // Only contributions from exterior of our node
 | 
			
		||||
  ///////////////////////////////////////////////////
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
						DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
						SiteSpinor *buf, int LLs, int sU,
 | 
			
		||||
						const FermionField &in, FermionField &out,int dag) {
 | 
			
		||||
  const SiteSpinor *chi_p;
 | 
			
		||||
  SiteSpinor chi;
 | 
			
		||||
  SiteSpinor Uchi;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int ptype;
 | 
			
		||||
  int nmu=0;
 | 
			
		||||
  int skew ;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=LLs*sU+s;
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    Uchi=zero;
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
    skew=8;
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Zp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Tp,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Xm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
 | 
			
		||||
    GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
 | 
			
		||||
 | 
			
		||||
    if ( nmu ) { 
 | 
			
		||||
      if ( dag ) { 
 | 
			
		||||
	out._odata[sF] = out._odata[sF] - Uchi;
 | 
			
		||||
      } else { 
 | 
			
		||||
	out._odata[sF] = out._odata[sF] + Uchi;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Driving / wrapping routine to select right kernel
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
					 SiteSpinor *buf, int LLs, int sU,
 | 
			
		||||
					 const FermionField &in, FermionField &out,
 | 
			
		||||
					 int interior,int exterior)
 | 
			
		||||
{
 | 
			
		||||
  int dag=1;
 | 
			
		||||
  DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
				      SiteSpinor *buf, int LLs, int sU,
 | 
			
		||||
				      const FermionField &in, FermionField &out,
 | 
			
		||||
				      int interior,int exterior)
 | 
			
		||||
{
 | 
			
		||||
  int dag=0;
 | 
			
		||||
  DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
				      SiteSpinor *buf, int LLs,
 | 
			
		||||
				      int sU, const FermionField &in, FermionField &out,
 | 
			
		||||
				      int dag,int interior,int exterior) 
 | 
			
		||||
{
 | 
			
		||||
  switch(Opt) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
  case OptInlineAsm:
 | 
			
		||||
    if ( interior && exterior ) {
 | 
			
		||||
      DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    } else { 
 | 
			
		||||
      std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
    break;
 | 
			
		||||
#endif
 | 
			
		||||
  case OptHandUnroll:
 | 
			
		||||
    if ( interior && exterior ) {
 | 
			
		||||
      DhopSiteHand   (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    } else if ( interior ) {
 | 
			
		||||
      DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    } else if ( exterior ) {
 | 
			
		||||
      DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    }
 | 
			
		||||
    break;
 | 
			
		||||
  case OptGeneric:
 | 
			
		||||
    if ( interior && exterior ) {
 | 
			
		||||
      DhopSiteGeneric   (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    } else if ( interior ) {
 | 
			
		||||
      DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    } else if ( exterior ) {
 | 
			
		||||
      DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
 | 
			
		||||
    }
 | 
			
		||||
    break;
 | 
			
		||||
  default:
 | 
			
		||||
    std::cout<<"Oops Opt = "<<Opt<<std::endl;
 | 
			
		||||
    assert(0);
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeField &U,  DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
 | 
			
		||||
				      int sU, const FermionField &in, FermionField &out, int dir, int disp) 
 | 
			
		||||
{
 | 
			
		||||
  // Disp should be either +1,-1,+3,-3
 | 
			
		||||
  // What about "dag" ?
 | 
			
		||||
  // Because we work out pU . dS/dU 
 | 
			
		||||
  // U
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
FermOpStaggeredTemplateInstantiate(StaggeredKernels);
 | 
			
		||||
FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
@@ -1,122 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/fermion/StaggeredKernels.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi, Peter Boyle
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_QCD_STAGGERED_KERNELS_H
 | 
			
		||||
#define GRID_QCD_STAGGERED_KERNELS_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Helper routines that implement Staggered stencil for a single site.
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
class StaggeredKernelsStatic { 
 | 
			
		||||
 public:
 | 
			
		||||
  enum { OptGeneric, OptHandUnroll, OptInlineAsm };
 | 
			
		||||
  enum { CommsAndCompute, CommsThenCompute };
 | 
			
		||||
  static int Opt;
 | 
			
		||||
  static int Comms;
 | 
			
		||||
};
 | 
			
		||||
 
 | 
			
		||||
template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic { 
 | 
			
		||||
 public:
 | 
			
		||||
   
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef FermionOperator<Impl> Base;
 | 
			
		||||
   
 | 
			
		||||
public:
 | 
			
		||||
    
 | 
			
		||||
   void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
 | 
			
		||||
		      int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
 | 
			
		||||
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   // Generic Nc kernels
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
			DoubledGaugeField &U, DoubledGaugeField &UUU, 
 | 
			
		||||
			SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
			const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
   void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
			   DoubledGaugeField &U, DoubledGaugeField &UUU, 
 | 
			
		||||
			   SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
			   const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
   void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
			   DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
			   SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
			   const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   // Nc=3 specific kernels
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
		     DoubledGaugeField &U,DoubledGaugeField &UUU, 
 | 
			
		||||
		     SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
		     const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
   void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
			DoubledGaugeField &U,DoubledGaugeField &UUU, 
 | 
			
		||||
			SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
			const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
   void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
			DoubledGaugeField &U,DoubledGaugeField &UUU, 
 | 
			
		||||
			SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
			const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   // Asm Nc=3 specific kernels
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
		    DoubledGaugeField &U,DoubledGaugeField &UUU, 
 | 
			
		||||
		    SiteSpinor * buf, int LLs, int sU, 
 | 
			
		||||
		    const FermionField &in, FermionField &out,int dag);
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   // Generic interface; fan out to right routine
 | 
			
		||||
   ///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
   void DhopSite(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
		 DoubledGaugeField &U, DoubledGaugeField &UUU, 
 | 
			
		||||
		 SiteSpinor * buf, int LLs, int sU,
 | 
			
		||||
		 const FermionField &in, FermionField &out, int interior=1,int exterior=1);
 | 
			
		||||
 | 
			
		||||
   void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
		    DoubledGaugeField &U, DoubledGaugeField &UUU, 
 | 
			
		||||
		    SiteSpinor * buf, int LLs, int sU,
 | 
			
		||||
		    const FermionField &in, FermionField &out, int interior=1,int exterior=1);
 | 
			
		||||
 | 
			
		||||
   void DhopSite(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
		 DoubledGaugeField &U, DoubledGaugeField &UUU, 
 | 
			
		||||
		 SiteSpinor * buf, int LLs, int sU,
 | 
			
		||||
		 const FermionField &in, FermionField &out, int dag, int interior,int exterior);
 | 
			
		||||
  
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  StaggeredKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,399 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI(b)		\
 | 
			
		||||
  const SiteSpinor & ref (b[offset]);	\
 | 
			
		||||
    Chi_0=ref()()(0);\
 | 
			
		||||
    Chi_1=ref()()(1);\
 | 
			
		||||
    Chi_2=ref()()(2);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// To splat or not to splat depends on the implementation
 | 
			
		||||
#define MULT(A,UChi)				\
 | 
			
		||||
  auto & ref(U._odata[sU](A));			\
 | 
			
		||||
   Impl::loadLinkElement(U_00,ref()(0,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_10,ref()(1,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_20,ref()(2,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_01,ref()(0,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_11,ref()(1,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_21,ref()(2,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_02,ref()(0,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_12,ref()(1,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_22,ref()(2,2));     \
 | 
			
		||||
    UChi ## _0  = U_00*Chi_0;	       \
 | 
			
		||||
    UChi ## _1  = U_10*Chi_0;\
 | 
			
		||||
    UChi ## _2  = U_20*Chi_0;\
 | 
			
		||||
    UChi ## _0 += U_01*Chi_1;\
 | 
			
		||||
    UChi ## _1 += U_11*Chi_1;\
 | 
			
		||||
    UChi ## _2 += U_21*Chi_1;\
 | 
			
		||||
    UChi ## _0 += U_02*Chi_2;\
 | 
			
		||||
    UChi ## _1 += U_12*Chi_2;\
 | 
			
		||||
    UChi ## _2 += U_22*Chi_2;
 | 
			
		||||
 | 
			
		||||
#define MULT_ADD(U,A,UChi)			\
 | 
			
		||||
  auto & ref(U._odata[sU](A));			\
 | 
			
		||||
   Impl::loadLinkElement(U_00,ref()(0,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_10,ref()(1,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_20,ref()(2,0));      \
 | 
			
		||||
   Impl::loadLinkElement(U_01,ref()(0,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_11,ref()(1,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_21,ref()(2,1));      \
 | 
			
		||||
   Impl::loadLinkElement(U_02,ref()(0,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_12,ref()(1,2));     \
 | 
			
		||||
   Impl::loadLinkElement(U_22,ref()(2,2));     \
 | 
			
		||||
    UChi ## _0 += U_00*Chi_0;	       \
 | 
			
		||||
    UChi ## _1 += U_10*Chi_0;\
 | 
			
		||||
    UChi ## _2 += U_20*Chi_0;\
 | 
			
		||||
    UChi ## _0 += U_01*Chi_1;\
 | 
			
		||||
    UChi ## _1 += U_11*Chi_1;\
 | 
			
		||||
    UChi ## _2 += U_21*Chi_1;\
 | 
			
		||||
    UChi ## _0 += U_02*Chi_2;\
 | 
			
		||||
    UChi ## _1 += U_12*Chi_2;\
 | 
			
		||||
    UChi ## _2 += U_22*Chi_2;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR(dir)			\
 | 
			
		||||
  permute##dir(Chi_0,Chi_0);			\
 | 
			
		||||
  permute##dir(Chi_1,Chi_1);			\
 | 
			
		||||
  permute##dir(Chi_2,Chi_2);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,Dir+skew,sF);	\
 | 
			
		||||
  offset = SE->_offset;			\
 | 
			
		||||
  local  = SE->_is_local;		\
 | 
			
		||||
  perm   = SE->_permute;		\
 | 
			
		||||
  if ( local ) {						\
 | 
			
		||||
    LOAD_CHI(in._odata);					\
 | 
			
		||||
    if ( perm) {						\
 | 
			
		||||
      PERMUTE_DIR(Perm);					\
 | 
			
		||||
    }								\
 | 
			
		||||
  } else {							\
 | 
			
		||||
    LOAD_CHI(buf);						\
 | 
			
		||||
  }								
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even)		\
 | 
			
		||||
  HAND_STENCIL_LEG_BASE(Dir,Perm,skew)				\
 | 
			
		||||
  {								\
 | 
			
		||||
    MULT(Dir,even);						\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even)			\
 | 
			
		||||
  HAND_STENCIL_LEG_BASE(Dir,Perm,skew)				\
 | 
			
		||||
  {								\
 | 
			
		||||
    MULT_ADD(U,Dir,even);					\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,Dir+skew,sF);			\
 | 
			
		||||
  offset = SE->_offset;					\
 | 
			
		||||
  local  = SE->_is_local;				\
 | 
			
		||||
  perm   = SE->_permute;				\
 | 
			
		||||
  if ( local ) {					\
 | 
			
		||||
    LOAD_CHI(in._odata);				\
 | 
			
		||||
    if ( perm) {					\
 | 
			
		||||
      PERMUTE_DIR(Perm);				\
 | 
			
		||||
    }							\
 | 
			
		||||
  } else if ( st.same_node[Dir] ) {			\
 | 
			
		||||
    LOAD_CHI(buf);					\
 | 
			
		||||
  }							\
 | 
			
		||||
  if (SE->_is_local || st.same_node[Dir] ) {		\
 | 
			
		||||
    MULT_ADD(U,Dir,even);				\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,Dir+skew,sF);			\
 | 
			
		||||
  offset = SE->_offset;					\
 | 
			
		||||
  local  = SE->_is_local;				\
 | 
			
		||||
  perm   = SE->_permute;				\
 | 
			
		||||
  if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\
 | 
			
		||||
    nmu++;							\
 | 
			
		||||
    { LOAD_CHI(buf);	  }					\
 | 
			
		||||
    { MULT_ADD(U,Dir,even); }					\
 | 
			
		||||
  }								
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
					  DoubledGaugeField &U,DoubledGaugeField &UUU,
 | 
			
		||||
					  SiteSpinor *buf, int LLs, int sU, 
 | 
			
		||||
					  const FermionField &in, FermionField &out,int dag) 
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int skew;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=s+LLs*sU;
 | 
			
		||||
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_BEGIN(Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG      (U,Tm,0,skew,odd);  
 | 
			
		||||
    skew = 8;
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);  
 | 
			
		||||
    
 | 
			
		||||
    if ( dag ) {
 | 
			
		||||
      result()()(0) = - even_0 - odd_0;
 | 
			
		||||
      result()()(1) = - even_1 - odd_1;
 | 
			
		||||
      result()()(2) = - even_2 - odd_2;
 | 
			
		||||
    } else { 
 | 
			
		||||
      result()()(0) = even_0 + odd_0;
 | 
			
		||||
      result()()(1) = even_1 + odd_1;
 | 
			
		||||
      result()()(2) = even_2 + odd_2;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out._odata[sF],result);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
					     DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
					     SiteSpinor *buf, int LLs, int sU, 
 | 
			
		||||
					     const FermionField &in, FermionField &out,int dag) 
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int skew;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=s+LLs*sU;
 | 
			
		||||
 | 
			
		||||
    even_0 = zero;    even_1 = zero;    even_2 = zero;
 | 
			
		||||
     odd_0 = zero;     odd_1 = zero;     odd_2 = zero;
 | 
			
		||||
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);  
 | 
			
		||||
    skew = 8;
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);  
 | 
			
		||||
 | 
			
		||||
    // Assume every site must be connected to at least one interior point. No 1^4 subvols.
 | 
			
		||||
    if ( dag ) {
 | 
			
		||||
      result()()(0) = - even_0 - odd_0;
 | 
			
		||||
      result()()(1) = - even_1 - odd_1;
 | 
			
		||||
      result()()(2) = - even_2 - odd_2;
 | 
			
		||||
    } else { 
 | 
			
		||||
      result()()(0) = even_0 + odd_0;
 | 
			
		||||
      result()()(1) = even_1 + odd_1;
 | 
			
		||||
      result()()(2) = even_2 + odd_2;
 | 
			
		||||
    }
 | 
			
		||||
    vstream(out._odata[sF],result);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, 
 | 
			
		||||
					     DoubledGaugeField &U, DoubledGaugeField &UUU,
 | 
			
		||||
					     SiteSpinor *buf, int LLs, int sU, 
 | 
			
		||||
					     const FermionField &in, FermionField &out,int dag) 
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  Simd even_0; // 12 regs on knc
 | 
			
		||||
  Simd even_1;
 | 
			
		||||
  Simd even_2;
 | 
			
		||||
  Simd odd_0; // 12 regs on knc
 | 
			
		||||
  Simd odd_1;
 | 
			
		||||
  Simd odd_2;
 | 
			
		||||
 | 
			
		||||
  Simd Chi_0;    // two spinor; 6 regs
 | 
			
		||||
  Simd Chi_1;
 | 
			
		||||
  Simd Chi_2;
 | 
			
		||||
  
 | 
			
		||||
  Simd U_00;  // two rows of U matrix
 | 
			
		||||
  Simd U_10;
 | 
			
		||||
  Simd U_20;  
 | 
			
		||||
  Simd U_01;
 | 
			
		||||
  Simd U_11;
 | 
			
		||||
  Simd U_21;  // 2 reg left.
 | 
			
		||||
  Simd U_02;
 | 
			
		||||
  Simd U_12;
 | 
			
		||||
  Simd U_22; 
 | 
			
		||||
 | 
			
		||||
  SiteSpinor result;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int skew;
 | 
			
		||||
 | 
			
		||||
  for(int s=0;s<LLs;s++){
 | 
			
		||||
    int sF=s+LLs*sU;
 | 
			
		||||
 | 
			
		||||
    even_0 = zero;    even_1 = zero;    even_2 = zero;
 | 
			
		||||
     odd_0 = zero;     odd_1 = zero;     odd_2 = zero;
 | 
			
		||||
    int nmu=0;
 | 
			
		||||
    skew = 0;
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);  
 | 
			
		||||
    skew = 8;
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Zp,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Tp,0,skew,odd);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Xm,3,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);   
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);  
 | 
			
		||||
    HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);  
 | 
			
		||||
 | 
			
		||||
    // Add sum of all exterior connected stencil legs
 | 
			
		||||
    if ( nmu ) { 
 | 
			
		||||
      if ( dag ) {
 | 
			
		||||
	result()()(0) = - even_0 - odd_0;
 | 
			
		||||
	result()()(1) = - even_1 - odd_1;
 | 
			
		||||
	result()()(2) = - even_2 - odd_2;
 | 
			
		||||
      } else { 
 | 
			
		||||
	result()()(0) = even_0 + odd_0;
 | 
			
		||||
	result()()(1) = even_1 + odd_1;
 | 
			
		||||
	result()()(2) = even_2 + odd_2;
 | 
			
		||||
      }
 | 
			
		||||
      out._odata[sF] = out._odata[sF] + result;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL)				\
 | 
			
		||||
  template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
 | 
			
		||||
						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
 | 
			
		||||
						     SiteSpinor *buf, int LLs, int sU, \
 | 
			
		||||
						     const FermionField &in, FermionField &out, int dag); \
 | 
			
		||||
									\
 | 
			
		||||
  template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
 | 
			
		||||
						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
 | 
			
		||||
						     SiteSpinor *buf, int LLs, int sU, \
 | 
			
		||||
						     const FermionField &in, FermionField &out, int dag); \
 | 
			
		||||
									\
 | 
			
		||||
  template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
 | 
			
		||||
						     DoubledGaugeField &U,DoubledGaugeField &UUU, \
 | 
			
		||||
						     SiteSpinor *buf, int LLs, int sU, \
 | 
			
		||||
						     const FermionField &in, FermionField &out, int dag); \
 | 
			
		||||
 | 
			
		||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
 | 
			
		||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
 | 
			
		||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
 | 
			
		||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,243 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
    Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
//#include <Grid/Eigen/Dense>
 | 
			
		||||
#include <Grid/qcd/spin/Dirac.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid
 | 
			
		||||
{
 | 
			
		||||
namespace QCD
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
// *NOT* EO
 | 
			
		||||
template <class Impl>
 | 
			
		||||
RealD WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  FermionField temp(out._grid);
 | 
			
		||||
 | 
			
		||||
  // Wilson term
 | 
			
		||||
  out.checkerboard = in.checkerboard;
 | 
			
		||||
  this->Dhop(in, out, DaggerNo);
 | 
			
		||||
 | 
			
		||||
  // Clover term
 | 
			
		||||
  Mooee(in, temp);
 | 
			
		||||
 | 
			
		||||
  out += temp;
 | 
			
		||||
  return norm2(out);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
RealD WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  FermionField temp(out._grid);
 | 
			
		||||
 | 
			
		||||
  // Wilson term
 | 
			
		||||
  out.checkerboard = in.checkerboard;
 | 
			
		||||
  this->Dhop(in, out, DaggerYes);
 | 
			
		||||
 | 
			
		||||
  // Clover term
 | 
			
		||||
  MooeeDag(in, temp);
 | 
			
		||||
 | 
			
		||||
  out += temp;
 | 
			
		||||
  return norm2(out);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
			
		||||
{
 | 
			
		||||
  WilsonFermion<Impl>::ImportGauge(_Umu);
 | 
			
		||||
  GridBase *grid = _Umu._grid;
 | 
			
		||||
  typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
 | 
			
		||||
 | 
			
		||||
  // Compute the field strength terms mu>nu
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(Bx, _Umu, Zdir, Ydir);
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(By, _Umu, Zdir, Xdir);
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(Bz, _Umu, Ydir, Xdir);
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(Ex, _Umu, Tdir, Xdir);
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(Ey, _Umu, Tdir, Ydir);
 | 
			
		||||
  WilsonLoops<Impl>::FieldStrength(Ez, _Umu, Tdir, Zdir);
 | 
			
		||||
 | 
			
		||||
  // Compute the Clover Operator acting on Colour and Spin
 | 
			
		||||
  // multiply here by the clover coefficients for the anisotropy
 | 
			
		||||
  CloverTerm  = fillCloverYZ(Bx) * csw_r;
 | 
			
		||||
  CloverTerm += fillCloverXZ(By) * csw_r;
 | 
			
		||||
  CloverTerm += fillCloverXY(Bz) * csw_r;
 | 
			
		||||
  CloverTerm += fillCloverXT(Ex) * csw_t;
 | 
			
		||||
  CloverTerm += fillCloverYT(Ey) * csw_t;
 | 
			
		||||
  CloverTerm += fillCloverZT(Ez) * csw_t;
 | 
			
		||||
  CloverTerm += diag_mass;
 | 
			
		||||
 | 
			
		||||
  int lvol = _Umu._grid->lSites();
 | 
			
		||||
  int DimRep = Impl::Dimension;
 | 
			
		||||
 | 
			
		||||
  Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
 | 
			
		||||
  Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> lcoor;
 | 
			
		||||
  typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero;
 | 
			
		||||
 | 
			
		||||
  for (int site = 0; site < lvol; site++)
 | 
			
		||||
  {
 | 
			
		||||
    grid->LocalIndexToLocalCoor(site, lcoor);
 | 
			
		||||
    EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
 | 
			
		||||
    peekLocalSite(Qx, CloverTerm, lcoor);
 | 
			
		||||
    Qxinv = zero;
 | 
			
		||||
    //if (csw!=0){
 | 
			
		||||
    for (int j = 0; j < Ns; j++)
 | 
			
		||||
      for (int k = 0; k < Ns; k++)
 | 
			
		||||
        for (int a = 0; a < DimRep; a++)
 | 
			
		||||
          for (int b = 0; b < DimRep; b++)
 | 
			
		||||
            EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
 | 
			
		||||
    //   if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
 | 
			
		||||
 | 
			
		||||
    EigenInvCloverOp = EigenCloverOp.inverse();
 | 
			
		||||
    //std::cout << EigenInvCloverOp << std::endl;
 | 
			
		||||
    for (int j = 0; j < Ns; j++)
 | 
			
		||||
      for (int k = 0; k < Ns; k++)
 | 
			
		||||
        for (int a = 0; a < DimRep; a++)
 | 
			
		||||
          for (int b = 0; b < DimRep; b++)
 | 
			
		||||
            Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
 | 
			
		||||
    //    if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
 | 
			
		||||
    //  }
 | 
			
		||||
    pokeLocalSite(Qxinv, CloverTermInv, lcoor);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Separate the even and odd parts
 | 
			
		||||
  pickCheckerboard(Even, CloverTermEven, CloverTerm);
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(in, out, DaggerNo, InverseNo);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(in, out, DaggerYes, InverseNo);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(in, out, DaggerNo, InverseYes);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  this->MooeeInternal(in, out, DaggerYes, InverseYes);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
 | 
			
		||||
{
 | 
			
		||||
  out.checkerboard = in.checkerboard;
 | 
			
		||||
  CloverFieldType *Clover;
 | 
			
		||||
  assert(in.checkerboard == Odd || in.checkerboard == Even);
 | 
			
		||||
 | 
			
		||||
  if (dag)
 | 
			
		||||
  {
 | 
			
		||||
    if (in._grid->_isCheckerBoarded)
 | 
			
		||||
    {
 | 
			
		||||
      if (in.checkerboard == Odd)
 | 
			
		||||
      {
 | 
			
		||||
        Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd;
 | 
			
		||||
      }
 | 
			
		||||
      else
 | 
			
		||||
      {
 | 
			
		||||
        Clover = (inv) ? &CloverTermInvDagEven : &CloverTermDagEven;
 | 
			
		||||
      }
 | 
			
		||||
      out = *Clover * in;
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      Clover = (inv) ? &CloverTermInv : &CloverTerm;
 | 
			
		||||
      out = adj(*Clover) * in;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    if (in._grid->_isCheckerBoarded)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
      if (in.checkerboard == Odd)
 | 
			
		||||
      {
 | 
			
		||||
        //  std::cout << "Calling clover term Odd" << std::endl;
 | 
			
		||||
        Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd;
 | 
			
		||||
      }
 | 
			
		||||
      else
 | 
			
		||||
      {
 | 
			
		||||
        //  std::cout << "Calling clover term Even" << std::endl;
 | 
			
		||||
        Clover = (inv) ? &CloverTermInvEven : &CloverTermEven;
 | 
			
		||||
      }
 | 
			
		||||
      out = *Clover * in;
 | 
			
		||||
      //  std::cout << GridLogMessage << "*Clover.checkerboard "  << (*Clover).checkerboard << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      Clover = (inv) ? &CloverTermInv : &CloverTerm;
 | 
			
		||||
      out = *Clover * in;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
} // MooeeInternal
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Derivative parts
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Derivative parts
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
			
		||||
{
 | 
			
		||||
  assert(0); // not implemented yet
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
FermOpTemplateInstantiate(WilsonCloverFermion);
 | 
			
		||||
AdjointFermOpTemplateInstantiate(WilsonCloverFermion);
 | 
			
		||||
TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion);
 | 
			
		||||
//GparityFermOpTemplateInstantiate(WilsonCloverFermion);
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
@@ -1,366 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
    Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
    Author: David Preti <>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H
 | 
			
		||||
#define GRID_QCD_WILSON_CLOVER_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid
 | 
			
		||||
{
 | 
			
		||||
namespace QCD
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////
 | 
			
		||||
// Wilson Clover
 | 
			
		||||
//
 | 
			
		||||
// Operator ( with anisotropy coefficients):
 | 
			
		||||
//
 | 
			
		||||
// Q =   1 + (Nd-1)/xi_0 + m
 | 
			
		||||
//     + W_t + (nu/xi_0) * W_s
 | 
			
		||||
//     - 1/2*[ csw_t * sum_s (sigma_ts F_ts) + (csw_s/xi_0) * sum_ss (sigma_ss F_ss)  ]
 | 
			
		||||
//
 | 
			
		||||
// s spatial, t temporal directions.
 | 
			
		||||
// where W_t and W_s are the temporal and spatial components of the
 | 
			
		||||
// Wilson Dirac operator
 | 
			
		||||
//
 | 
			
		||||
// csw_r = csw_t to recover the isotropic version
 | 
			
		||||
//////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class WilsonCloverFermion : public WilsonFermion<Impl>
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  // Types definitions
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  template <typename vtype>
 | 
			
		||||
  using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
 | 
			
		||||
  typedef iImplClover<Simd> SiteCloverType;
 | 
			
		||||
  typedef Lattice<SiteCloverType> CloverFieldType;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  typedef WilsonFermion<Impl> WilsonBase;
 | 
			
		||||
 | 
			
		||||
  virtual void Instantiatable(void){};
 | 
			
		||||
  // Constructors
 | 
			
		||||
  WilsonCloverFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
			
		||||
                      GridRedBlackCartesian &Hgrid,
 | 
			
		||||
                      const RealD _mass,
 | 
			
		||||
                      const RealD _csw_r = 0.0,
 | 
			
		||||
                      const RealD _csw_t = 0.0,
 | 
			
		||||
                      const WilsonAnisotropyCoefficients &clover_anisotropy = WilsonAnisotropyCoefficients(),
 | 
			
		||||
                      const ImplParams &impl_p = ImplParams()) : WilsonFermion<Impl>(_Umu,
 | 
			
		||||
                                                                                     Fgrid,
 | 
			
		||||
                                                                                     Hgrid,
 | 
			
		||||
                                                                                     _mass, impl_p, clover_anisotropy),
 | 
			
		||||
                                                                 CloverTerm(&Fgrid),
 | 
			
		||||
                                                                 CloverTermInv(&Fgrid),
 | 
			
		||||
                                                                 CloverTermEven(&Hgrid),
 | 
			
		||||
                                                                 CloverTermOdd(&Hgrid),
 | 
			
		||||
                                                                 CloverTermInvEven(&Hgrid),
 | 
			
		||||
                                                                 CloverTermInvOdd(&Hgrid),
 | 
			
		||||
                                                                 CloverTermDagEven(&Hgrid),
 | 
			
		||||
                                                                 CloverTermDagOdd(&Hgrid),
 | 
			
		||||
                                                                 CloverTermInvDagEven(&Hgrid),
 | 
			
		||||
                                                                 CloverTermInvDagOdd(&Hgrid)
 | 
			
		||||
  {
 | 
			
		||||
    assert(Nd == 4); // require 4 dimensions
 | 
			
		||||
 | 
			
		||||
    if (clover_anisotropy.isAnisotropic)
 | 
			
		||||
    {
 | 
			
		||||
      csw_r = _csw_r * 0.5 / clover_anisotropy.xi_0;
 | 
			
		||||
      diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      csw_r = _csw_r * 0.5;
 | 
			
		||||
      diag_mass = 4.0 + _mass;
 | 
			
		||||
    }
 | 
			
		||||
    csw_t = _csw_t * 0.5;
 | 
			
		||||
 | 
			
		||||
    if (csw_r == 0)
 | 
			
		||||
      std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_r = 0" << std::endl;
 | 
			
		||||
    if (csw_t == 0)
 | 
			
		||||
      std::cout << GridLogWarning << "Initializing WilsonCloverFermion with csw_t = 0" << std::endl;
 | 
			
		||||
 | 
			
		||||
    ImportGauge(_Umu);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual RealD M(const FermionField &in, FermionField &out);
 | 
			
		||||
  virtual RealD Mdag(const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
  virtual void Mooee(const FermionField &in, FermionField &out);
 | 
			
		||||
  virtual void MooeeDag(const FermionField &in, FermionField &out);
 | 
			
		||||
  virtual void MooeeInv(const FermionField &in, FermionField &out);
 | 
			
		||||
  virtual void MooeeInvDag(const FermionField &in, FermionField &out);
 | 
			
		||||
  virtual void MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv);
 | 
			
		||||
 | 
			
		||||
  //virtual void MDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
 | 
			
		||||
  virtual void MooDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
 | 
			
		||||
  virtual void MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag);
 | 
			
		||||
 | 
			
		||||
  void ImportGauge(const GaugeField &_Umu);
 | 
			
		||||
 | 
			
		||||
  // Derivative parts unpreconditioned pseudofermions
 | 
			
		||||
  void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
 | 
			
		||||
  {
 | 
			
		||||
    conformable(X._grid, Y._grid);
 | 
			
		||||
    conformable(X._grid, force._grid);
 | 
			
		||||
    GaugeLinkField force_mu(force._grid), lambda(force._grid);
 | 
			
		||||
    GaugeField clover_force(force._grid);
 | 
			
		||||
    PropagatorField Lambda(force._grid);
 | 
			
		||||
 | 
			
		||||
    // Guido: Here we are hitting some performance issues:
 | 
			
		||||
    // need to extract the components of the DoubledGaugeField
 | 
			
		||||
    // for each call
 | 
			
		||||
    // Possible solution
 | 
			
		||||
    // Create a vector object to store them? (cons: wasting space)
 | 
			
		||||
    std::vector<GaugeLinkField> U(Nd, this->Umu._grid);
 | 
			
		||||
 | 
			
		||||
    Impl::extractLinkField(U, this->Umu);
 | 
			
		||||
 | 
			
		||||
    force = zero;
 | 
			
		||||
    // Derivative of the Wilson hopping term
 | 
			
		||||
    this->DhopDeriv(force, X, Y, dag);
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    // Clover term derivative
 | 
			
		||||
    ///////////////////////////////////////////////////////////
 | 
			
		||||
    Impl::outerProductImpl(Lambda, X, Y);
 | 
			
		||||
    //std::cout << "Lambda:" << Lambda << std::endl;
 | 
			
		||||
 | 
			
		||||
    Gamma::Algebra sigma[] = {
 | 
			
		||||
        Gamma::Algebra::SigmaXY,
 | 
			
		||||
        Gamma::Algebra::SigmaXZ,
 | 
			
		||||
        Gamma::Algebra::SigmaXT,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaXY,
 | 
			
		||||
        Gamma::Algebra::SigmaYZ,
 | 
			
		||||
        Gamma::Algebra::SigmaYT,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaXZ,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaYZ,
 | 
			
		||||
        Gamma::Algebra::SigmaZT,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaXT,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaYT,
 | 
			
		||||
        Gamma::Algebra::MinusSigmaZT};
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
      sigma_{\mu \nu}=
 | 
			
		||||
      | 0         sigma[0]  sigma[1]  sigma[2] |
 | 
			
		||||
      | sigma[3]    0       sigma[4]  sigma[5] |
 | 
			
		||||
      | sigma[6]  sigma[7]     0      sigma[8] |
 | 
			
		||||
      | sigma[9]  sigma[10] sigma[11]   0      |
 | 
			
		||||
    */
 | 
			
		||||
 | 
			
		||||
    int count = 0;
 | 
			
		||||
    clover_force = zero;
 | 
			
		||||
    for (int mu = 0; mu < 4; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      force_mu = zero;
 | 
			
		||||
      for (int nu = 0; nu < 4; nu++)
 | 
			
		||||
      {
 | 
			
		||||
        if (mu == nu)
 | 
			
		||||
        continue;
 | 
			
		||||
        
 | 
			
		||||
        RealD factor;
 | 
			
		||||
        if (nu == 4 || mu == 4)
 | 
			
		||||
        {
 | 
			
		||||
          factor = 2.0 * csw_t;
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          factor = 2.0 * csw_r;
 | 
			
		||||
        }
 | 
			
		||||
        PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
 | 
			
		||||
        Impl::TraceSpinImpl(lambda, Slambda);                   // traceSpin ok
 | 
			
		||||
        force_mu -= factor*Cmunu(U, lambda, mu, nu);                   // checked
 | 
			
		||||
        count++;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      pokeLorentz(clover_force, U[mu] * force_mu, mu);
 | 
			
		||||
    }
 | 
			
		||||
    //clover_force *= csw;
 | 
			
		||||
    force += clover_force;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis
 | 
			
		||||
  GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu)
 | 
			
		||||
  {
 | 
			
		||||
    conformable(lambda._grid, U[0]._grid);
 | 
			
		||||
    GaugeLinkField out(lambda._grid), tmp(lambda._grid);
 | 
			
		||||
    // insertion in upper staple
 | 
			
		||||
    // please check redundancy of shift operations
 | 
			
		||||
 | 
			
		||||
    // C1+
 | 
			
		||||
    tmp = lambda * U[nu];
 | 
			
		||||
    out = Impl::ShiftStaple(Impl::CovShiftForward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
 | 
			
		||||
 | 
			
		||||
    // C2+
 | 
			
		||||
    tmp = U[mu] * Impl::ShiftStaple(adj(lambda), mu);
 | 
			
		||||
    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(tmp, mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu);
 | 
			
		||||
 | 
			
		||||
    // C3+
 | 
			
		||||
    tmp = U[nu] * Impl::ShiftStaple(adj(lambda), nu);
 | 
			
		||||
    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(tmp, nu))), mu);
 | 
			
		||||
 | 
			
		||||
    // C4+
 | 
			
		||||
    out += Impl::ShiftStaple(Impl::CovShiftForward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, Impl::CovShiftIdentityBackward(U[nu], nu))), mu) * lambda;
 | 
			
		||||
 | 
			
		||||
    // insertion in lower staple
 | 
			
		||||
    // C1-
 | 
			
		||||
    out -= Impl::ShiftStaple(lambda, mu) * Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
 | 
			
		||||
 | 
			
		||||
    // C2-
 | 
			
		||||
    tmp = adj(lambda) * U[nu];
 | 
			
		||||
    out -= Impl::ShiftStaple(Impl::CovShiftBackward(tmp, nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu);
 | 
			
		||||
 | 
			
		||||
    // C3-
 | 
			
		||||
    tmp = lambda * U[nu];
 | 
			
		||||
    out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, tmp)), mu);
 | 
			
		||||
 | 
			
		||||
    // C4-
 | 
			
		||||
    out -= Impl::ShiftStaple(Impl::CovShiftBackward(U[nu], nu, Impl::CovShiftBackward(U[mu], mu, U[nu])), mu) * lambda;
 | 
			
		||||
 | 
			
		||||
    return out;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  // here fixing the 4 dimensions, make it more general?
 | 
			
		||||
 | 
			
		||||
  RealD csw_r;                                               // Clover coefficient - spatial
 | 
			
		||||
  RealD csw_t;                                               // Clover coefficient - temporal
 | 
			
		||||
  RealD diag_mass;                                           // Mass term
 | 
			
		||||
  CloverFieldType CloverTerm, CloverTermInv;                 // Clover term
 | 
			
		||||
  CloverFieldType CloverTermEven, CloverTermOdd;             // Clover term EO
 | 
			
		||||
  CloverFieldType CloverTermInvEven, CloverTermInvOdd;       // Clover term Inv EO
 | 
			
		||||
  CloverFieldType CloverTermDagEven, CloverTermDagOdd;       // Clover term Dag EO
 | 
			
		||||
  CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO
 | 
			
		||||
 | 
			
		||||
  // eventually these can be compressed into 6x6 blocks instead of the 12x12
 | 
			
		||||
  // using the DeGrand-Rossi basis for the gamma matrices
 | 
			
		||||
  CloverFieldType fillCloverYZ(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  CloverFieldType fillCloverXZ(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      T._odata[i]()(0, 1) = -F._odata[i]()();
 | 
			
		||||
      T._odata[i]()(1, 0) = F._odata[i]()();
 | 
			
		||||
      T._odata[i]()(2, 3) = -F._odata[i]()();
 | 
			
		||||
      T._odata[i]()(3, 2) = F._odata[i]()();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  CloverFieldType fillCloverXY(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
      T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(1, 1) = timesI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(3, 3) = timesI(F._odata[i]()());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  CloverFieldType fillCloverXT(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      T._odata[i]()(0, 1) = timesI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(1, 0) = timesI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  CloverFieldType fillCloverYT(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      T._odata[i]()(0, 1) = -(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(1, 0) = (F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(2, 3) = (F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(3, 2) = -(F._odata[i]()());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  CloverFieldType fillCloverZT(const GaugeLinkField &F)
 | 
			
		||||
  {
 | 
			
		||||
    CloverFieldType T(F._grid);
 | 
			
		||||
    T = zero;
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (int i = 0; i < CloverTerm._grid->oSites(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      T._odata[i]()(0, 0) = timesI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()());
 | 
			
		||||
      T._odata[i]()(3, 3) = timesI(F._odata[i]()());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return T;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif // GRID_QCD_WILSON_CLOVER_FERMION_H
 | 
			
		||||
@@ -1,878 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
			
		||||
 | 
			
		||||
#define REGISTER
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHIMU_BODY(F)			\
 | 
			
		||||
  Chimu_00=ref(F)(0)(0);			\
 | 
			
		||||
  Chimu_01=ref(F)(0)(1);			\
 | 
			
		||||
  Chimu_02=ref(F)(0)(2);			\
 | 
			
		||||
  Chimu_10=ref(F)(1)(0);			\
 | 
			
		||||
  Chimu_11=ref(F)(1)(1);			\
 | 
			
		||||
  Chimu_12=ref(F)(1)(2);			\
 | 
			
		||||
  Chimu_20=ref(F)(2)(0);			\
 | 
			
		||||
  Chimu_21=ref(F)(2)(1);			\
 | 
			
		||||
  Chimu_22=ref(F)(2)(2);			\
 | 
			
		||||
  Chimu_30=ref(F)(3)(0);			\
 | 
			
		||||
  Chimu_31=ref(F)(3)(1);			\
 | 
			
		||||
  Chimu_32=ref(F)(3)(2)
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHIMU(DIR,F,PERM)						\
 | 
			
		||||
  { const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_BODY(F)				\
 | 
			
		||||
    Chi_00 = ref(F)(0)(0);\
 | 
			
		||||
    Chi_01 = ref(F)(0)(1);\
 | 
			
		||||
    Chi_02 = ref(F)(0)(2);\
 | 
			
		||||
    Chi_10 = ref(F)(1)(0);\
 | 
			
		||||
    Chi_11 = ref(F)(1)(1);\
 | 
			
		||||
    Chi_12 = ref(F)(1)(2)
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI(DIR,F,PERM)					\
 | 
			
		||||
  {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//G-parity implementations using in-place intrinsic ops
 | 
			
		||||
 | 
			
		||||
//1l 1h -> 1h 1l
 | 
			
		||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
 | 
			
		||||
//0h,1l -> 1l,0h
 | 
			
		||||
//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
 | 
			
		||||
//Pulled fermion through forwards face, GPBC on upper component
 | 
			
		||||
//Need 0= 0l 1h   1= 1l 0h
 | 
			
		||||
//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
 | 
			
		||||
//Pulled fermion through backwards face, GPBC on lower component
 | 
			
		||||
//Need 0= 1l 0h   1= 0l 1h
 | 
			
		||||
 | 
			
		||||
//1l 1h -> 1h 1l
 | 
			
		||||
//0l 0h , 1h 1l -> 0l 1h 0h,1l
 | 
			
		||||
#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3)			\
 | 
			
		||||
  permute##PERM(tmp1, ref(1)(S)(C));				\
 | 
			
		||||
  exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1);		\
 | 
			
		||||
  INTO = tmp2;
 | 
			
		||||
 | 
			
		||||
//0l 0h -> 0h 0l
 | 
			
		||||
//1l 1h, 0h 0l -> 1l 0h, 1h 0l
 | 
			
		||||
#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3)			\
 | 
			
		||||
  permute##PERM(tmp1, ref(0)(S)(C));				\
 | 
			
		||||
  exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1);		\
 | 
			
		||||
  INTO = tmp2;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_SETUP(DIR,F)						\
 | 
			
		||||
  g = F;								\
 | 
			
		||||
  direction = st._directions[DIR];				\
 | 
			
		||||
  distance = st._distances[DIR];				\
 | 
			
		||||
  sl = st._grid->_simd_layout[direction];			\
 | 
			
		||||
  inplace_twist = 0;						\
 | 
			
		||||
  if(SE->_around_the_world && this->Params.twists[DIR % 4]){		\
 | 
			
		||||
    if(sl == 1){							\
 | 
			
		||||
      g = (F+1) % 2;							\
 | 
			
		||||
    }else{								\
 | 
			
		||||
      inplace_twist = 1;						\
 | 
			
		||||
    }									\
 | 
			
		||||
  }  
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)			\
 | 
			
		||||
  { const SiteSpinor &ref(in._odata[offset]);				\
 | 
			
		||||
    LOAD_CHI_SETUP(DIR,F);						\
 | 
			
		||||
    if(!inplace_twist){							\
 | 
			
		||||
      LOAD_CHIMU_BODY(g);						\
 | 
			
		||||
    }else{								\
 | 
			
		||||
      if(  ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
 | 
			
		||||
	   ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
      }else{								\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM,  U_00,U_01,U_10);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM,  U_11,U_20,U_21);		\
 | 
			
		||||
      } \
 | 
			
		||||
    } \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)				\
 | 
			
		||||
  { const SiteHalfSpinor &ref(buf[offset]);				\
 | 
			
		||||
    LOAD_CHI_SETUP(DIR,F);						\
 | 
			
		||||
    if(!inplace_twist){							\
 | 
			
		||||
      LOAD_CHI_BODY(g);							\
 | 
			
		||||
    }else{								\
 | 
			
		||||
      if(  ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
 | 
			
		||||
	   ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_00,0,0,F,PERM,  U_00,U_01,U_10);			\
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_01,0,1,F,PERM,  U_11,U_20,U_21);			\
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_02,0,2,F,PERM,  UChi_00,UChi_01,UChi_02);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_10,1,0,F,PERM,  UChi_10,UChi_11,UChi_12);		\
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_11,1,1,F,PERM,  U_00,U_01,U_10);			\
 | 
			
		||||
	DO_TWIST_0L_1H(Chi_12,1,2,F,PERM,  U_11,U_20,U_21);			\
 | 
			
		||||
      }else{								\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_00,0,0,F,PERM,  U_00,U_01,U_10);			\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_01,0,1,F,PERM,  U_11,U_20,U_21);			\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_02,0,2,F,PERM,  UChi_00,UChi_01,UChi_02);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_10,1,0,F,PERM,  UChi_10,UChi_11,UChi_12);		\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_11,1,1,F,PERM,  U_00,U_01,U_10);			\
 | 
			
		||||
	DO_TWIST_1L_0H(Chi_12,1,2,F,PERM,  U_11,U_20,U_21);			\
 | 
			
		||||
      }									\
 | 
			
		||||
    }									\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
 | 
			
		||||
#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
 | 
			
		||||
 | 
			
		||||
// To splat or not to splat depends on the implementation
 | 
			
		||||
#define MULT_2SPIN_BODY \
 | 
			
		||||
  Impl::loadLinkElement(U_00,ref()(0,0));	\
 | 
			
		||||
  Impl::loadLinkElement(U_10,ref()(1,0));	\
 | 
			
		||||
  Impl::loadLinkElement(U_20,ref()(2,0));	\
 | 
			
		||||
  Impl::loadLinkElement(U_01,ref()(0,1));	\
 | 
			
		||||
  Impl::loadLinkElement(U_11,ref()(1,1));	\
 | 
			
		||||
  Impl::loadLinkElement(U_21,ref()(2,1));	\
 | 
			
		||||
  UChi_00 = U_00*Chi_00;			\
 | 
			
		||||
  UChi_10 = U_00*Chi_10;			\
 | 
			
		||||
  UChi_01 = U_10*Chi_00;			\
 | 
			
		||||
  UChi_11 = U_10*Chi_10;			\
 | 
			
		||||
  UChi_02 = U_20*Chi_00;			\
 | 
			
		||||
  UChi_12 = U_20*Chi_10;			\
 | 
			
		||||
  UChi_00+= U_01*Chi_01;			\
 | 
			
		||||
  UChi_10+= U_01*Chi_11;			\
 | 
			
		||||
  UChi_01+= U_11*Chi_01;			\
 | 
			
		||||
  UChi_11+= U_11*Chi_11;			\
 | 
			
		||||
  UChi_02+= U_21*Chi_01;			\
 | 
			
		||||
  UChi_12+= U_21*Chi_11;			\
 | 
			
		||||
  Impl::loadLinkElement(U_00,ref()(0,2));	\
 | 
			
		||||
  Impl::loadLinkElement(U_10,ref()(1,2));	\
 | 
			
		||||
  Impl::loadLinkElement(U_20,ref()(2,2));	\
 | 
			
		||||
  UChi_00+= U_00*Chi_02;			\
 | 
			
		||||
  UChi_10+= U_00*Chi_12;			\
 | 
			
		||||
  UChi_01+= U_10*Chi_02;			\
 | 
			
		||||
  UChi_11+= U_10*Chi_12;			\
 | 
			
		||||
  UChi_02+= U_20*Chi_02;			\
 | 
			
		||||
  UChi_12+= U_20*Chi_12
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define MULT_2SPIN(A,F)					\
 | 
			
		||||
  {auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
 | 
			
		||||
 | 
			
		||||
#define MULT_2SPIN_GPARITY(A,F)				\
 | 
			
		||||
  {auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PERMUTE_DIR(dir)			\
 | 
			
		||||
      permute##dir(Chi_00,Chi_00);\
 | 
			
		||||
      permute##dir(Chi_01,Chi_01);\
 | 
			
		||||
      permute##dir(Chi_02,Chi_02);\
 | 
			
		||||
      permute##dir(Chi_10,Chi_10);\
 | 
			
		||||
      permute##dir(Chi_11,Chi_11);\
 | 
			
		||||
      permute##dir(Chi_12,Chi_12);
 | 
			
		||||
 | 
			
		||||
//      hspin(0)=fspin(0)+timesI(fspin(3));
 | 
			
		||||
//      hspin(1)=fspin(1)+timesI(fspin(2));
 | 
			
		||||
#define XP_PROJ \
 | 
			
		||||
    Chi_00 = Chimu_00+timesI(Chimu_30);\
 | 
			
		||||
    Chi_01 = Chimu_01+timesI(Chimu_31);\
 | 
			
		||||
    Chi_02 = Chimu_02+timesI(Chimu_32);\
 | 
			
		||||
    Chi_10 = Chimu_10+timesI(Chimu_20);\
 | 
			
		||||
    Chi_11 = Chimu_11+timesI(Chimu_21);\
 | 
			
		||||
    Chi_12 = Chimu_12+timesI(Chimu_22);
 | 
			
		||||
 | 
			
		||||
#define YP_PROJ \
 | 
			
		||||
    Chi_00 = Chimu_00-Chimu_30;\
 | 
			
		||||
    Chi_01 = Chimu_01-Chimu_31;\
 | 
			
		||||
    Chi_02 = Chimu_02-Chimu_32;\
 | 
			
		||||
    Chi_10 = Chimu_10+Chimu_20;\
 | 
			
		||||
    Chi_11 = Chimu_11+Chimu_21;\
 | 
			
		||||
    Chi_12 = Chimu_12+Chimu_22;
 | 
			
		||||
 | 
			
		||||
#define ZP_PROJ \
 | 
			
		||||
  Chi_00 = Chimu_00+timesI(Chimu_20);		\
 | 
			
		||||
  Chi_01 = Chimu_01+timesI(Chimu_21);		\
 | 
			
		||||
  Chi_02 = Chimu_02+timesI(Chimu_22);		\
 | 
			
		||||
  Chi_10 = Chimu_10-timesI(Chimu_30);		\
 | 
			
		||||
  Chi_11 = Chimu_11-timesI(Chimu_31);		\
 | 
			
		||||
  Chi_12 = Chimu_12-timesI(Chimu_32);
 | 
			
		||||
 | 
			
		||||
#define TP_PROJ \
 | 
			
		||||
  Chi_00 = Chimu_00+Chimu_20;		\
 | 
			
		||||
  Chi_01 = Chimu_01+Chimu_21;		\
 | 
			
		||||
  Chi_02 = Chimu_02+Chimu_22;		\
 | 
			
		||||
  Chi_10 = Chimu_10+Chimu_30;		\
 | 
			
		||||
  Chi_11 = Chimu_11+Chimu_31;		\
 | 
			
		||||
  Chi_12 = Chimu_12+Chimu_32;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//      hspin(0)=fspin(0)-timesI(fspin(3));
 | 
			
		||||
//      hspin(1)=fspin(1)-timesI(fspin(2));
 | 
			
		||||
#define XM_PROJ \
 | 
			
		||||
    Chi_00 = Chimu_00-timesI(Chimu_30);\
 | 
			
		||||
    Chi_01 = Chimu_01-timesI(Chimu_31);\
 | 
			
		||||
    Chi_02 = Chimu_02-timesI(Chimu_32);\
 | 
			
		||||
    Chi_10 = Chimu_10-timesI(Chimu_20);\
 | 
			
		||||
    Chi_11 = Chimu_11-timesI(Chimu_21);\
 | 
			
		||||
    Chi_12 = Chimu_12-timesI(Chimu_22);
 | 
			
		||||
 | 
			
		||||
#define YM_PROJ \
 | 
			
		||||
    Chi_00 = Chimu_00+Chimu_30;\
 | 
			
		||||
    Chi_01 = Chimu_01+Chimu_31;\
 | 
			
		||||
    Chi_02 = Chimu_02+Chimu_32;\
 | 
			
		||||
    Chi_10 = Chimu_10-Chimu_20;\
 | 
			
		||||
    Chi_11 = Chimu_11-Chimu_21;\
 | 
			
		||||
    Chi_12 = Chimu_12-Chimu_22;
 | 
			
		||||
 | 
			
		||||
#define ZM_PROJ \
 | 
			
		||||
  Chi_00 = Chimu_00-timesI(Chimu_20);		\
 | 
			
		||||
  Chi_01 = Chimu_01-timesI(Chimu_21);		\
 | 
			
		||||
  Chi_02 = Chimu_02-timesI(Chimu_22);		\
 | 
			
		||||
  Chi_10 = Chimu_10+timesI(Chimu_30);		\
 | 
			
		||||
  Chi_11 = Chimu_11+timesI(Chimu_31);		\
 | 
			
		||||
  Chi_12 = Chimu_12+timesI(Chimu_32);
 | 
			
		||||
 | 
			
		||||
#define TM_PROJ \
 | 
			
		||||
  Chi_00 = Chimu_00-Chimu_20;		\
 | 
			
		||||
  Chi_01 = Chimu_01-Chimu_21;		\
 | 
			
		||||
  Chi_02 = Chimu_02-Chimu_22;		\
 | 
			
		||||
  Chi_10 = Chimu_10-Chimu_30;		\
 | 
			
		||||
  Chi_11 = Chimu_11-Chimu_31;		\
 | 
			
		||||
  Chi_12 = Chimu_12-Chimu_32;
 | 
			
		||||
 | 
			
		||||
//      fspin(0)=hspin(0);
 | 
			
		||||
//      fspin(1)=hspin(1);
 | 
			
		||||
//      fspin(2)=timesMinusI(hspin(1));
 | 
			
		||||
//      fspin(3)=timesMinusI(hspin(0));
 | 
			
		||||
#define XP_RECON\
 | 
			
		||||
  result_00 = UChi_00;\
 | 
			
		||||
  result_01 = UChi_01;\
 | 
			
		||||
  result_02 = UChi_02;\
 | 
			
		||||
  result_10 = UChi_10;\
 | 
			
		||||
  result_11 = UChi_11;\
 | 
			
		||||
  result_12 = UChi_12;\
 | 
			
		||||
  result_20 = timesMinusI(UChi_10);\
 | 
			
		||||
  result_21 = timesMinusI(UChi_11);\
 | 
			
		||||
  result_22 = timesMinusI(UChi_12);\
 | 
			
		||||
  result_30 = timesMinusI(UChi_00);\
 | 
			
		||||
  result_31 = timesMinusI(UChi_01);\
 | 
			
		||||
  result_32 = timesMinusI(UChi_02);
 | 
			
		||||
 | 
			
		||||
#define XP_RECON_ACCUM\
 | 
			
		||||
  result_00+=UChi_00;\
 | 
			
		||||
  result_01+=UChi_01;\
 | 
			
		||||
  result_02+=UChi_02;\
 | 
			
		||||
  result_10+=UChi_10;\
 | 
			
		||||
  result_11+=UChi_11;\
 | 
			
		||||
  result_12+=UChi_12;\
 | 
			
		||||
  result_20-=timesI(UChi_10);\
 | 
			
		||||
  result_21-=timesI(UChi_11);\
 | 
			
		||||
  result_22-=timesI(UChi_12);\
 | 
			
		||||
  result_30-=timesI(UChi_00);\
 | 
			
		||||
  result_31-=timesI(UChi_01);\
 | 
			
		||||
  result_32-=timesI(UChi_02);
 | 
			
		||||
 | 
			
		||||
#define XM_RECON\
 | 
			
		||||
  result_00 = UChi_00;\
 | 
			
		||||
  result_01 = UChi_01;\
 | 
			
		||||
  result_02 = UChi_02;\
 | 
			
		||||
  result_10 = UChi_10;\
 | 
			
		||||
  result_11 = UChi_11;\
 | 
			
		||||
  result_12 = UChi_12;\
 | 
			
		||||
  result_20 = timesI(UChi_10);\
 | 
			
		||||
  result_21 = timesI(UChi_11);\
 | 
			
		||||
  result_22 = timesI(UChi_12);\
 | 
			
		||||
  result_30 = timesI(UChi_00);\
 | 
			
		||||
  result_31 = timesI(UChi_01);\
 | 
			
		||||
  result_32 = timesI(UChi_02);
 | 
			
		||||
 | 
			
		||||
#define XM_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20+= timesI(UChi_10);\
 | 
			
		||||
  result_21+= timesI(UChi_11);\
 | 
			
		||||
  result_22+= timesI(UChi_12);\
 | 
			
		||||
  result_30+= timesI(UChi_00);\
 | 
			
		||||
  result_31+= timesI(UChi_01);\
 | 
			
		||||
  result_32+= timesI(UChi_02);
 | 
			
		||||
 | 
			
		||||
#define YP_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20+= UChi_10;\
 | 
			
		||||
  result_21+= UChi_11;\
 | 
			
		||||
  result_22+= UChi_12;\
 | 
			
		||||
  result_30-= UChi_00;\
 | 
			
		||||
  result_31-= UChi_01;\
 | 
			
		||||
  result_32-= UChi_02;
 | 
			
		||||
 | 
			
		||||
#define YM_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20-= UChi_10;\
 | 
			
		||||
  result_21-= UChi_11;\
 | 
			
		||||
  result_22-= UChi_12;\
 | 
			
		||||
  result_30+= UChi_00;\
 | 
			
		||||
  result_31+= UChi_01;\
 | 
			
		||||
  result_32+= UChi_02;
 | 
			
		||||
 | 
			
		||||
#define ZP_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20-= timesI(UChi_00);			\
 | 
			
		||||
  result_21-= timesI(UChi_01);			\
 | 
			
		||||
  result_22-= timesI(UChi_02);			\
 | 
			
		||||
  result_30+= timesI(UChi_10);			\
 | 
			
		||||
  result_31+= timesI(UChi_11);			\
 | 
			
		||||
  result_32+= timesI(UChi_12);
 | 
			
		||||
 | 
			
		||||
#define ZM_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20+= timesI(UChi_00);			\
 | 
			
		||||
  result_21+= timesI(UChi_01);			\
 | 
			
		||||
  result_22+= timesI(UChi_02);			\
 | 
			
		||||
  result_30-= timesI(UChi_10);			\
 | 
			
		||||
  result_31-= timesI(UChi_11);			\
 | 
			
		||||
  result_32-= timesI(UChi_12);
 | 
			
		||||
 | 
			
		||||
#define TP_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20+= UChi_00;			\
 | 
			
		||||
  result_21+= UChi_01;			\
 | 
			
		||||
  result_22+= UChi_02;			\
 | 
			
		||||
  result_30+= UChi_10;			\
 | 
			
		||||
  result_31+= UChi_11;			\
 | 
			
		||||
  result_32+= UChi_12;
 | 
			
		||||
 | 
			
		||||
#define TM_RECON_ACCUM\
 | 
			
		||||
  result_00+= UChi_00;\
 | 
			
		||||
  result_01+= UChi_01;\
 | 
			
		||||
  result_02+= UChi_02;\
 | 
			
		||||
  result_10+= UChi_10;\
 | 
			
		||||
  result_11+= UChi_11;\
 | 
			
		||||
  result_12+= UChi_12;\
 | 
			
		||||
  result_20-= UChi_00;	\
 | 
			
		||||
  result_21-= UChi_01;	\
 | 
			
		||||
  result_22-= UChi_02;	\
 | 
			
		||||
  result_30-= UChi_10;	\
 | 
			
		||||
  result_31-= UChi_11;	\
 | 
			
		||||
  result_32-= UChi_12;
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  SE=st.GetEntry(ptype,DIR,ss);			\
 | 
			
		||||
  offset = SE->_offset;				\
 | 
			
		||||
  local  = SE->_is_local;			\
 | 
			
		||||
  perm   = SE->_permute;			\
 | 
			
		||||
  if ( local ) {				\
 | 
			
		||||
    LOAD_CHIMU_IMPL(DIR,F,PERM);			\
 | 
			
		||||
    PROJ;					\
 | 
			
		||||
    if ( perm) {				\
 | 
			
		||||
      PERMUTE_DIR(PERM);			\
 | 
			
		||||
    }						\
 | 
			
		||||
  } else {					\
 | 
			
		||||
    LOAD_CHI_IMPL(DIR,F,PERM);			\
 | 
			
		||||
  }						\
 | 
			
		||||
  MULT_2SPIN_IMPL(DIR,F);			\
 | 
			
		||||
  RECON;					
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,DIR,ss);			\
 | 
			
		||||
  offset = SE->_offset;				\
 | 
			
		||||
  local  = SE->_is_local;			\
 | 
			
		||||
  perm   = SE->_permute;			\
 | 
			
		||||
  if ( local ) {				\
 | 
			
		||||
    LOAD_CHIMU_IMPL(DIR,F,PERM);			\
 | 
			
		||||
    PROJ;					\
 | 
			
		||||
    if ( perm) {				\
 | 
			
		||||
      PERMUTE_DIR(PERM);			\
 | 
			
		||||
    }						\
 | 
			
		||||
  } else if ( st.same_node[DIR] ) {		\
 | 
			
		||||
    LOAD_CHI_IMPL(DIR,F,PERM);			\
 | 
			
		||||
  }						\
 | 
			
		||||
  if (local || st.same_node[DIR] ) {		\
 | 
			
		||||
    MULT_2SPIN_IMPL(DIR,F);			\
 | 
			
		||||
    RECON;					\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL)	\
 | 
			
		||||
  SE=st.GetEntry(ptype,DIR,ss);			\
 | 
			
		||||
  offset = SE->_offset;				\
 | 
			
		||||
  local  = SE->_is_local;			\
 | 
			
		||||
  perm   = SE->_permute;			\
 | 
			
		||||
  if((!SE->_is_local)&&(!st.same_node[DIR]) ) {	\
 | 
			
		||||
    LOAD_CHI_IMPL(DIR,F,PERM);			\
 | 
			
		||||
    MULT_2SPIN_IMPL(DIR,F);			\
 | 
			
		||||
    RECON;					\
 | 
			
		||||
    nmu++;					\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define HAND_RESULT(ss,F)			\
 | 
			
		||||
  {						\
 | 
			
		||||
    SiteSpinor & ref (out._odata[ss]);		\
 | 
			
		||||
    vstream(ref(F)(0)(0),result_00);		\
 | 
			
		||||
    vstream(ref(F)(0)(1),result_01);		\
 | 
			
		||||
    vstream(ref(F)(0)(2),result_02);		\
 | 
			
		||||
    vstream(ref(F)(1)(0),result_10);		\
 | 
			
		||||
    vstream(ref(F)(1)(1),result_11);		\
 | 
			
		||||
    vstream(ref(F)(1)(2),result_12);		\
 | 
			
		||||
    vstream(ref(F)(2)(0),result_20);		\
 | 
			
		||||
    vstream(ref(F)(2)(1),result_21);		\
 | 
			
		||||
    vstream(ref(F)(2)(2),result_22);		\
 | 
			
		||||
    vstream(ref(F)(3)(0),result_30);		\
 | 
			
		||||
    vstream(ref(F)(3)(1),result_31);		\
 | 
			
		||||
    vstream(ref(F)(3)(2),result_32);		\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define HAND_RESULT_EXT(ss,F)			\
 | 
			
		||||
  if (nmu){					\
 | 
			
		||||
    SiteSpinor & ref (out._odata[ss]);		\
 | 
			
		||||
    ref(F)(0)(0)+=result_00;		\
 | 
			
		||||
    ref(F)(0)(1)+=result_01;		\
 | 
			
		||||
    ref(F)(0)(2)+=result_02;		\
 | 
			
		||||
    ref(F)(1)(0)+=result_10;		\
 | 
			
		||||
    ref(F)(1)(1)+=result_11;		\
 | 
			
		||||
    ref(F)(1)(2)+=result_12;		\
 | 
			
		||||
    ref(F)(2)(0)+=result_20;		\
 | 
			
		||||
    ref(F)(2)(1)+=result_21;		\
 | 
			
		||||
    ref(F)(2)(2)+=result_22;		\
 | 
			
		||||
    ref(F)(3)(0)+=result_30;		\
 | 
			
		||||
    ref(F)(3)(1)+=result_31;		\
 | 
			
		||||
    ref(F)(3)(2)+=result_32;		\
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define HAND_DECLARATIONS(a)			\
 | 
			
		||||
  Simd result_00;				\
 | 
			
		||||
  Simd result_01;				\
 | 
			
		||||
  Simd result_02;				\
 | 
			
		||||
  Simd result_10;				\
 | 
			
		||||
  Simd result_11;				\
 | 
			
		||||
  Simd result_12;				\
 | 
			
		||||
  Simd result_20;				\
 | 
			
		||||
  Simd result_21;				\
 | 
			
		||||
  Simd result_22;				\
 | 
			
		||||
  Simd result_30;				\
 | 
			
		||||
  Simd result_31;				\
 | 
			
		||||
  Simd result_32;				\
 | 
			
		||||
  Simd Chi_00;					\
 | 
			
		||||
  Simd Chi_01;					\
 | 
			
		||||
  Simd Chi_02;					\
 | 
			
		||||
  Simd Chi_10;					\
 | 
			
		||||
  Simd Chi_11;					\
 | 
			
		||||
  Simd Chi_12;					\
 | 
			
		||||
  Simd UChi_00;					\
 | 
			
		||||
  Simd UChi_01;					\
 | 
			
		||||
  Simd UChi_02;					\
 | 
			
		||||
  Simd UChi_10;					\
 | 
			
		||||
  Simd UChi_11;					\
 | 
			
		||||
  Simd UChi_12;					\
 | 
			
		||||
  Simd U_00;					\
 | 
			
		||||
  Simd U_10;					\
 | 
			
		||||
  Simd U_20;					\
 | 
			
		||||
  Simd U_01;					\
 | 
			
		||||
  Simd U_11;					\
 | 
			
		||||
  Simd U_21;
 | 
			
		||||
 | 
			
		||||
#define ZERO_RESULT				\
 | 
			
		||||
  result_00=zero;				\
 | 
			
		||||
  result_01=zero;				\
 | 
			
		||||
  result_02=zero;				\
 | 
			
		||||
  result_10=zero;				\
 | 
			
		||||
  result_11=zero;				\
 | 
			
		||||
  result_12=zero;				\
 | 
			
		||||
  result_20=zero;				\
 | 
			
		||||
  result_21=zero;				\
 | 
			
		||||
  result_22=zero;				\
 | 
			
		||||
  result_30=zero;				\
 | 
			
		||||
  result_31=zero;				\
 | 
			
		||||
  result_32=zero;			
 | 
			
		||||
 | 
			
		||||
#define Chimu_00 Chi_00
 | 
			
		||||
#define Chimu_01 Chi_01
 | 
			
		||||
#define Chimu_02 Chi_02
 | 
			
		||||
#define Chimu_10 Chi_10
 | 
			
		||||
#define Chimu_11 Chi_11
 | 
			
		||||
#define Chimu_12 Chi_12
 | 
			
		||||
#define Chimu_20 UChi_00
 | 
			
		||||
#define Chimu_21 UChi_01
 | 
			
		||||
#define Chimu_22 UChi_02
 | 
			
		||||
#define Chimu_30 UChi_10
 | 
			
		||||
#define Chimu_31 UChi_11
 | 
			
		||||
#define Chimu_32 UChi_12
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
 | 
			
		||||
					  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);	\
 | 
			
		||||
  HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_RESULT(ss,F)
 | 
			
		||||
 | 
			
		||||
  HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
						  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_RESULT(ss,F)
 | 
			
		||||
 | 
			
		||||
  HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
 | 
			
		||||
					  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  ZERO_RESULT; \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_RESULT(ss,F)
 | 
			
		||||
 | 
			
		||||
  HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
						  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL)				\
 | 
			
		||||
  ZERO_RESULT;							\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL);		\
 | 
			
		||||
  HAND_RESULT(ss,F)
 | 
			
		||||
  
 | 
			
		||||
  HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl> void 
 | 
			
		||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf,
 | 
			
		||||
					  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int nmu=0;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  ZERO_RESULT; \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_RESULT_EXT(ss,F)
 | 
			
		||||
 | 
			
		||||
  HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
			
		||||
						  int ss,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename Simd::scalar_type S;
 | 
			
		||||
  typedef typename Simd::vector_type V;
 | 
			
		||||
 | 
			
		||||
  HAND_DECLARATIONS(ignore);
 | 
			
		||||
 | 
			
		||||
  StencilEntry *SE;
 | 
			
		||||
  int offset,local,perm, ptype;
 | 
			
		||||
  int nmu=0;
 | 
			
		||||
 | 
			
		||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
 | 
			
		||||
  ZERO_RESULT; \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
 | 
			
		||||
  HAND_RESULT_EXT(ss,F)
 | 
			
		||||
 | 
			
		||||
  HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define HAND_SPECIALISE_GPARITY(IMPL)					\
 | 
			
		||||
  template<> void							\
 | 
			
		||||
  WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf, \
 | 
			
		||||
				    int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }									\
 | 
			
		||||
									\
 | 
			
		||||
  template<>								\
 | 
			
		||||
  void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
					    int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist;					\
 | 
			
		||||
    HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }									\
 | 
			
		||||
									\
 | 
			
		||||
  template<> void							\
 | 
			
		||||
  WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf, \
 | 
			
		||||
						     int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist;					\
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }									\
 | 
			
		||||
									\
 | 
			
		||||
  template<>								\
 | 
			
		||||
  void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
							     int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
 | 
			
		||||
    HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }									\
 | 
			
		||||
									\
 | 
			
		||||
  template<> void							\
 | 
			
		||||
  WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor  *buf, \
 | 
			
		||||
						     int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    int nmu=0;								\
 | 
			
		||||
    HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    nmu = 0;								\
 | 
			
		||||
    HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }									\
 | 
			
		||||
  template<>								\
 | 
			
		||||
  void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
							     int ss,int sU,const FermionField &in, FermionField &out) \
 | 
			
		||||
  {									\
 | 
			
		||||
    typedef IMPL Impl;							\
 | 
			
		||||
    typedef typename Simd::scalar_type S;				\
 | 
			
		||||
    typedef typename Simd::vector_type V;				\
 | 
			
		||||
									\
 | 
			
		||||
    HAND_DECLARATIONS(ignore);						\
 | 
			
		||||
									\
 | 
			
		||||
    StencilEntry *SE;							\
 | 
			
		||||
    int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
 | 
			
		||||
    int nmu=0;								\
 | 
			
		||||
    HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
    nmu = 0;								\
 | 
			
		||||
    HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
 | 
			
		||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
 | 
			
		||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
 | 
			
		||||
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
////////////// Wilson ; uses this implementation /////////////////////
 | 
			
		||||
 | 
			
		||||
#define INSTANTIATE_THEM(A) \
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
 | 
			
		||||
					     int ss,int sU,const FermionField &in, FermionField &out); \
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
						int ss,int sU,const FermionField &in, FermionField &out);\
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
 | 
			
		||||
						int ss,int sU,const FermionField &in, FermionField &out); \
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
						   int ss,int sU,const FermionField &in, FermionField &out); \
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
 | 
			
		||||
						int ss,int sU,const FermionField &in, FermionField &out); \
 | 
			
		||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
 | 
			
		||||
						   int ss,int sU,const FermionField &in, FermionField &out); 
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplF);
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplD);
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplFH);
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplDF);
 | 
			
		||||
}}
 | 
			
		||||
@@ -1,153 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_GAUGE_IMPL_TYPES_H
 | 
			
		||||
#define GRID_GAUGE_IMPL_TYPES_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Implementation dependent gauge types
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#define INHERIT_GIMPL_TYPES(GImpl)                  \
 | 
			
		||||
  typedef typename GImpl::Simd Simd;                \
 | 
			
		||||
  typedef typename GImpl::LinkField GaugeLinkField; \
 | 
			
		||||
  typedef typename GImpl::Field GaugeField;         \
 | 
			
		||||
  typedef typename GImpl::ComplexField ComplexField;\
 | 
			
		||||
  typedef typename GImpl::SiteField SiteGaugeField; \
 | 
			
		||||
  typedef typename GImpl::SiteComplex SiteComplex;  \
 | 
			
		||||
  typedef typename GImpl::SiteLink SiteGaugeLink;
 | 
			
		||||
 | 
			
		||||
#define INHERIT_FIELD_TYPES(Impl)		    \
 | 
			
		||||
  typedef typename Impl::Simd Simd;		    \
 | 
			
		||||
  typedef typename Impl::ComplexField ComplexField; \
 | 
			
		||||
  typedef typename Impl::SiteField SiteField;	    \
 | 
			
		||||
  typedef typename Impl::Field Field;
 | 
			
		||||
 | 
			
		||||
// hardcodes the exponential approximation in the template
 | 
			
		||||
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
 | 
			
		||||
public:
 | 
			
		||||
  typedef S Simd;
 | 
			
		||||
 | 
			
		||||
  template <typename vtype> using iImplScalar     = iScalar<iScalar<iScalar<vtype> > >;
 | 
			
		||||
  template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
 | 
			
		||||
  template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
 | 
			
		||||
 | 
			
		||||
  typedef iImplScalar<Simd>     SiteComplex;
 | 
			
		||||
  typedef iImplGaugeLink<Simd>  SiteLink;
 | 
			
		||||
  typedef iImplGaugeField<Simd> SiteField;
 | 
			
		||||
 | 
			
		||||
  typedef Lattice<SiteComplex> ComplexField;
 | 
			
		||||
  typedef Lattice<SiteLink>    LinkField; 
 | 
			
		||||
  typedef Lattice<SiteField>   Field;
 | 
			
		||||
 | 
			
		||||
  // Guido: we can probably separate the types from the HMC functions
 | 
			
		||||
  // this will create 2 kind of implementations
 | 
			
		||||
  // probably confusing the users
 | 
			
		||||
  // Now keeping only one class
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Move this elsewhere? FIXME
 | 
			
		||||
  static inline void AddLink(Field &U, LinkField &W,
 | 
			
		||||
                                  int mu) { // U[mu] += W
 | 
			
		||||
    PARALLEL_FOR_LOOP
 | 
			
		||||
    for (auto ss = 0; ss < U._grid->oSites(); ss++) {
 | 
			
		||||
      U._odata[ss]._internal[mu] =
 | 
			
		||||
          U._odata[ss]._internal[mu] + W._odata[ss]._internal;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////
 | 
			
		||||
  // Move these to another class
 | 
			
		||||
  // HMC auxiliary functions
 | 
			
		||||
  static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) {
 | 
			
		||||
    // specific for SU gauge fields
 | 
			
		||||
    LinkField Pmu(P._grid);
 | 
			
		||||
    Pmu = zero;
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
 | 
			
		||||
      PokeIndex<LorentzIndex>(P, Pmu, mu);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline Field projectForce(Field &P) { return Ta(P); }
 | 
			
		||||
 | 
			
		||||
  static inline void update_field(Field& P, Field& U, double ep){
 | 
			
		||||
    //static std::chrono::duration<double> diff;
 | 
			
		||||
 | 
			
		||||
    //auto start = std::chrono::high_resolution_clock::now();
 | 
			
		||||
    parallel_for(int ss=0;ss<P._grid->oSites();ss++){
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++) 
 | 
			
		||||
        U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    //auto end = std::chrono::high_resolution_clock::now();
 | 
			
		||||
   // diff += end - start;
 | 
			
		||||
   // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline RealD FieldSquareNorm(Field& U){
 | 
			
		||||
    LatticeComplex Hloc(U._grid);
 | 
			
		||||
    Hloc = zero;
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      auto Umu = PeekIndex<LorentzIndex>(U, mu);
 | 
			
		||||
      Hloc += trace(Umu * Umu);
 | 
			
		||||
    }
 | 
			
		||||
    Complex Hsum = sum(Hloc);
 | 
			
		||||
    return Hsum.real();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
    SU<Nc>::HotConfiguration(pRNG, U);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
    SU<Nc>::TepidConfiguration(pRNG, U);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
    SU<Nc>::ColdConfiguration(pRNG, U);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
 | 
			
		||||
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
 | 
			
		||||
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
 | 
			
		||||
 | 
			
		||||
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
 | 
			
		||||
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
 | 
			
		||||
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
} // QCD
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
#endif // GRID_GAUGE_IMPL_TYPES_H
 | 
			
		||||
@@ -1,417 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 
 | 
			
		||||
 Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 
 | 
			
		||||
 Source file: ./lib/qcd/action/gauge/Photon.h
 | 
			
		||||
 
 | 
			
		||||
 Copyright (C) 2015
 | 
			
		||||
 
 | 
			
		||||
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 
 | 
			
		||||
 This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 it under the terms of the GNU General Public License as published by
 | 
			
		||||
 the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 (at your option) any later version.
 | 
			
		||||
 
 | 
			
		||||
 This program is distributed in the hope that it will be useful,
 | 
			
		||||
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 GNU General Public License for more details.
 | 
			
		||||
 
 | 
			
		||||
 You should have received a copy of the GNU General Public License along
 | 
			
		||||
 with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 
 | 
			
		||||
 See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
 *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef QCD_PHOTON_ACTION_H
 | 
			
		||||
#define QCD_PHOTON_ACTION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
namespace QCD{
 | 
			
		||||
  template <class S>
 | 
			
		||||
  class QedGimpl
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef S Simd;
 | 
			
		||||
    
 | 
			
		||||
    template <typename vtype>
 | 
			
		||||
    using iImplGaugeLink  = iScalar<iScalar<iScalar<vtype>>>;
 | 
			
		||||
    template <typename vtype>
 | 
			
		||||
    using iImplGaugeField = iVector<iScalar<iScalar<vtype>>, Nd>;
 | 
			
		||||
    
 | 
			
		||||
    typedef iImplGaugeLink<Simd>  SiteLink;
 | 
			
		||||
    typedef iImplGaugeField<Simd> SiteField;
 | 
			
		||||
    typedef SiteField             SiteComplex;
 | 
			
		||||
    
 | 
			
		||||
    typedef Lattice<SiteLink>  LinkField;
 | 
			
		||||
    typedef Lattice<SiteField> Field;
 | 
			
		||||
    typedef Field              ComplexField;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  typedef QedGimpl<vComplex> QedGimplR;
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  class Photon
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
    GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3);
 | 
			
		||||
    GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2, qedInf, 3);
 | 
			
		||||
  public:
 | 
			
		||||
    Photon(Gauge gauge, ZmScheme zmScheme);
 | 
			
		||||
    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements);
 | 
			
		||||
    Photon(Gauge gauge, ZmScheme zmScheme, Real G0);
 | 
			
		||||
    Photon(Gauge gauge, ZmScheme zmScheme, std::vector<Real> improvements, Real G0);
 | 
			
		||||
    virtual ~Photon(void) = default;
 | 
			
		||||
    void FreePropagator(const GaugeField &in, GaugeField &out);
 | 
			
		||||
    void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
 | 
			
		||||
    void StochasticWeight(GaugeLinkField &weight);
 | 
			
		||||
    void StochasticField(GaugeField &out, GridParallelRNG &rng);
 | 
			
		||||
    void StochasticField(GaugeField &out, GridParallelRNG &rng,
 | 
			
		||||
                         const GaugeLinkField &weight);
 | 
			
		||||
    void UnitField(GaugeField &out);
 | 
			
		||||
  private:
 | 
			
		||||
    void infVolPropagator(GaugeLinkField &out);
 | 
			
		||||
    void invKHatSquared(GaugeLinkField &out);
 | 
			
		||||
    void zmSub(GaugeLinkField &out);
 | 
			
		||||
  private:
 | 
			
		||||
    Gauge    gauge_;
 | 
			
		||||
    ZmScheme zmScheme_;
 | 
			
		||||
    std::vector<Real>  improvement_;
 | 
			
		||||
    Real     G0_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  typedef Photon<QedGimplR>  PhotonR;
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
 | 
			
		||||
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()),
 | 
			
		||||
    G0_(0.15493339023106021408483720810737508876916113364521)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
 | 
			
		||||
                        std::vector<Real> improvements)
 | 
			
		||||
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements),
 | 
			
		||||
    G0_(0.15493339023106021408483720810737508876916113364521)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme, Real G0)
 | 
			
		||||
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(std::vector<Real>()), G0_(G0)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme,
 | 
			
		||||
                        std::vector<Real> improvements, Real G0)
 | 
			
		||||
  : gauge_(gauge), zmScheme_(zmScheme), improvement_(improvements), G0_(G0)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
 | 
			
		||||
  {
 | 
			
		||||
    FFT theFFT(in._grid);
 | 
			
		||||
    
 | 
			
		||||
    GaugeField in_k(in._grid);
 | 
			
		||||
    GaugeField prop_k(in._grid);
 | 
			
		||||
    
 | 
			
		||||
    theFFT.FFT_all_dim(in_k,in,FFT::forward);
 | 
			
		||||
    MomentumSpacePropagator(prop_k,in_k);
 | 
			
		||||
    theFFT.FFT_all_dim(out,prop_k,FFT::backward);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::infVolPropagator(GaugeLinkField &out)
 | 
			
		||||
  {
 | 
			
		||||
    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
 | 
			
		||||
    LatticeReal        xmu(grid);
 | 
			
		||||
    GaugeLinkField     one(grid);
 | 
			
		||||
    const unsigned int nd    = grid->_ndimension;
 | 
			
		||||
    std::vector<int>   &l    = grid->_fdimensions;
 | 
			
		||||
    std::vector<int>   x0(nd,0);
 | 
			
		||||
    TComplex           Tone  = Complex(1.0,0.0);
 | 
			
		||||
    TComplex           Tzero = Complex(G0_,0.0);
 | 
			
		||||
    FFT                fft(grid);
 | 
			
		||||
    
 | 
			
		||||
    one = Complex(1.0,0.0);
 | 
			
		||||
    out = zero;
 | 
			
		||||
    for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      LatticeCoordinate(xmu,mu);
 | 
			
		||||
      Real lo2 = l[mu]/2.0;
 | 
			
		||||
      xmu = where(xmu < lo2, xmu, xmu-double(l[mu]));
 | 
			
		||||
      out = out + toComplex(4*M_PI*M_PI*xmu*xmu);
 | 
			
		||||
    }
 | 
			
		||||
    pokeSite(Tone, out, x0);
 | 
			
		||||
    out = one/out;
 | 
			
		||||
    pokeSite(Tzero, out, x0);
 | 
			
		||||
    fft.FFT_all_dim(out, out, FFT::forward);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase           *grid = out._grid;
 | 
			
		||||
    GaugeLinkField     kmu(grid), one(grid);
 | 
			
		||||
    const unsigned int nd    = grid->_ndimension;
 | 
			
		||||
    std::vector<int>   &l    = grid->_fdimensions;
 | 
			
		||||
    std::vector<int>   zm(nd,0);
 | 
			
		||||
    TComplex           Tone = Complex(1.0,0.0);
 | 
			
		||||
    TComplex           Tzero= Complex(0.0,0.0);
 | 
			
		||||
    
 | 
			
		||||
    one = Complex(1.0,0.0);
 | 
			
		||||
    out = zero;
 | 
			
		||||
    for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      Real twoPiL = M_PI*2./l[mu];
 | 
			
		||||
      
 | 
			
		||||
      LatticeCoordinate(kmu,mu);
 | 
			
		||||
      kmu = 2.*sin(.5*twoPiL*kmu);
 | 
			
		||||
      out = out + kmu*kmu;
 | 
			
		||||
    }
 | 
			
		||||
    pokeSite(Tone, out, zm);
 | 
			
		||||
    out = one/out;
 | 
			
		||||
    pokeSite(Tzero, out, zm);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::zmSub(GaugeLinkField &out)
 | 
			
		||||
  {
 | 
			
		||||
    GridBase           *grid = out._grid;
 | 
			
		||||
    const unsigned int nd    = grid->_ndimension;
 | 
			
		||||
    std::vector<int>   &l    = grid->_fdimensions;
 | 
			
		||||
    
 | 
			
		||||
    switch (zmScheme_)
 | 
			
		||||
    {
 | 
			
		||||
      case ZmScheme::qedTL:
 | 
			
		||||
      {
 | 
			
		||||
        std::vector<int> zm(nd,0);
 | 
			
		||||
        TComplex         Tzero = Complex(0.0,0.0);
 | 
			
		||||
        
 | 
			
		||||
        pokeSite(Tzero, out, zm);
 | 
			
		||||
        
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      case ZmScheme::qedL:
 | 
			
		||||
      {
 | 
			
		||||
        LatticeInteger spNrm(grid), coor(grid);
 | 
			
		||||
        GaugeLinkField z(grid);
 | 
			
		||||
        
 | 
			
		||||
        spNrm = zero;
 | 
			
		||||
        for(int d = 0; d < grid->_ndimension - 1; d++)
 | 
			
		||||
        {
 | 
			
		||||
          LatticeCoordinate(coor,d);
 | 
			
		||||
          coor = where(coor < Integer(l[d]/2), coor, coor-Integer(l[d]));
 | 
			
		||||
          spNrm = spNrm + coor*coor;
 | 
			
		||||
        }
 | 
			
		||||
        out = where(spNrm == Integer(0), 0.*out, out);
 | 
			
		||||
 | 
			
		||||
        // IR improvement
 | 
			
		||||
        for(int i = 0; i < improvement_.size(); i++)
 | 
			
		||||
        {
 | 
			
		||||
          Real f = sqrt(improvement_[i]+1);
 | 
			
		||||
          out = where(spNrm == Integer(i+1), f*out, out);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
 | 
			
		||||
                                               GaugeField &out)
 | 
			
		||||
  {
 | 
			
		||||
  GridBase           *grid = out._grid;
 | 
			
		||||
    LatticeComplex     momProp(grid);
 | 
			
		||||
    
 | 
			
		||||
    switch (zmScheme_)
 | 
			
		||||
    {
 | 
			
		||||
      case ZmScheme::qedTL:
 | 
			
		||||
      case ZmScheme::qedL:
 | 
			
		||||
      {
 | 
			
		||||
        invKHatSquared(momProp);
 | 
			
		||||
        zmSub(momProp);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      case ZmScheme::qedInf:
 | 
			
		||||
      {
 | 
			
		||||
        infVolPropagator(momProp);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    out = in*momProp;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::StochasticWeight(GaugeLinkField &weight)
 | 
			
		||||
  {
 | 
			
		||||
    auto               *grid     = dynamic_cast<GridCartesian *>(weight._grid);
 | 
			
		||||
    const unsigned int nd        = grid->_ndimension;
 | 
			
		||||
    std::vector<int>   latt_size = grid->_fdimensions;
 | 
			
		||||
    
 | 
			
		||||
    switch (zmScheme_)
 | 
			
		||||
    {
 | 
			
		||||
      case ZmScheme::qedTL:
 | 
			
		||||
      case ZmScheme::qedL:
 | 
			
		||||
      {
 | 
			
		||||
        Integer vol = 1;
 | 
			
		||||
        for(int d = 0; d < nd; d++)
 | 
			
		||||
        {
 | 
			
		||||
          vol = vol * latt_size[d];
 | 
			
		||||
        }
 | 
			
		||||
        invKHatSquared(weight);
 | 
			
		||||
        weight = sqrt(vol)*sqrt(weight);
 | 
			
		||||
        zmSub(weight);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      case ZmScheme::qedInf:
 | 
			
		||||
      {
 | 
			
		||||
        infVolPropagator(weight);
 | 
			
		||||
        weight = sqrt(real(weight));
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
 | 
			
		||||
  {
 | 
			
		||||
    auto           *grid = dynamic_cast<GridCartesian *>(out._grid);
 | 
			
		||||
    GaugeLinkField weight(grid);
 | 
			
		||||
    
 | 
			
		||||
    StochasticWeight(weight);
 | 
			
		||||
    StochasticField(out, rng, weight);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng,
 | 
			
		||||
                                      const GaugeLinkField &weight)
 | 
			
		||||
  {
 | 
			
		||||
    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
 | 
			
		||||
    const unsigned int nd = grid->_ndimension;
 | 
			
		||||
    GaugeLinkField     r(grid);
 | 
			
		||||
    GaugeField         aTilde(grid);
 | 
			
		||||
    FFT                fft(grid);
 | 
			
		||||
    
 | 
			
		||||
    switch (zmScheme_)
 | 
			
		||||
    {
 | 
			
		||||
      case ZmScheme::qedTL:
 | 
			
		||||
      case ZmScheme::qedL:
 | 
			
		||||
      {
 | 
			
		||||
        for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
        {
 | 
			
		||||
          gaussian(rng, r);
 | 
			
		||||
          r = weight*r;
 | 
			
		||||
          pokeLorentz(aTilde, r, mu);
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      case ZmScheme::qedInf:
 | 
			
		||||
      {
 | 
			
		||||
        Complex                    shift(1., 1.); // This needs to be a GaugeLink element?
 | 
			
		||||
        for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
        {
 | 
			
		||||
          bernoulli(rng, r);
 | 
			
		||||
          r = weight*(2.*r - shift);
 | 
			
		||||
          pokeLorentz(aTilde, r, mu);
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fft.FFT_all_dim(out, aTilde, FFT::backward);
 | 
			
		||||
    
 | 
			
		||||
    out = real(out);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class Gimpl>
 | 
			
		||||
  void Photon<Gimpl>::UnitField(GaugeField &out)
 | 
			
		||||
  {
 | 
			
		||||
    auto               *grid = dynamic_cast<GridCartesian *>(out._grid);
 | 
			
		||||
    const unsigned int nd = grid->_ndimension;
 | 
			
		||||
    GaugeLinkField     r(grid);
 | 
			
		||||
    
 | 
			
		||||
    r = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
    for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      pokeLorentz(out, r, mu);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    out = real(out);
 | 
			
		||||
  }
 | 
			
		||||
//  template<class Gimpl>
 | 
			
		||||
//  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
 | 
			
		||||
//                                                            const GaugeField &in)
 | 
			
		||||
//  {
 | 
			
		||||
//    
 | 
			
		||||
//    FeynmanGaugeMomentumSpacePropagator_TL(out,in);
 | 
			
		||||
//    
 | 
			
		||||
//    GridBase *grid = out._grid;
 | 
			
		||||
//    LatticeInteger     coor(grid);
 | 
			
		||||
//    GaugeField zz(grid); zz=zero;
 | 
			
		||||
//    
 | 
			
		||||
//    // xyzt
 | 
			
		||||
//    for(int d = 0; d < grid->_ndimension-1;d++){
 | 
			
		||||
//      LatticeCoordinate(coor,d);
 | 
			
		||||
//      out = where(coor==Integer(0),zz,out);
 | 
			
		||||
//    }
 | 
			
		||||
//  }
 | 
			
		||||
//  
 | 
			
		||||
//  template<class Gimpl>
 | 
			
		||||
//  void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
 | 
			
		||||
//                                                             const GaugeField &in)
 | 
			
		||||
//  {
 | 
			
		||||
//    
 | 
			
		||||
//    // what type LatticeComplex
 | 
			
		||||
//    GridBase *grid = out._grid;
 | 
			
		||||
//    int nd = grid->_ndimension;
 | 
			
		||||
//    
 | 
			
		||||
//    typedef typename GaugeField::vector_type vector_type;
 | 
			
		||||
//    typedef typename GaugeField::scalar_type ScalComplex;
 | 
			
		||||
//    typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
			
		||||
//    
 | 
			
		||||
//    std::vector<int> latt_size   = grid->_fdimensions;
 | 
			
		||||
//    
 | 
			
		||||
//    LatComplex denom(grid); denom= zero;
 | 
			
		||||
//    LatComplex   one(grid); one = ScalComplex(1.0,0.0);
 | 
			
		||||
//    LatComplex   kmu(grid);
 | 
			
		||||
//    
 | 
			
		||||
//    ScalComplex ci(0.0,1.0);
 | 
			
		||||
//    // momphase = n * 2pi / L
 | 
			
		||||
//    for(int mu=0;mu<Nd;mu++) {
 | 
			
		||||
//      
 | 
			
		||||
//      LatticeCoordinate(kmu,mu);
 | 
			
		||||
//      
 | 
			
		||||
//      RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
//      
 | 
			
		||||
//      kmu = TwoPiL * kmu ;
 | 
			
		||||
//      
 | 
			
		||||
//      denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 | 
			
		||||
//    }
 | 
			
		||||
//    std::vector<int> zero_mode(nd,0);
 | 
			
		||||
//    TComplexD Tone = ComplexD(1.0,0.0);
 | 
			
		||||
//    TComplexD Tzero= ComplexD(0.0,0.0);
 | 
			
		||||
//    
 | 
			
		||||
//    pokeSite(Tone,denom,zero_mode);
 | 
			
		||||
//    
 | 
			
		||||
//    denom= one/denom;
 | 
			
		||||
//    
 | 
			
		||||
//    pokeSite(Tzero,denom,zero_mode);
 | 
			
		||||
//    
 | 
			
		||||
//    out = zero;
 | 
			
		||||
//    out = in*denom;
 | 
			
		||||
//  };
 | 
			
		||||
  
 | 
			
		||||
}}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,95 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef QCD_WILSON_GAUGE_ACTION_H
 | 
			
		||||
#define QCD_WILSON_GAUGE_ACTION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Wilson Gauge Action .. should I template the Nc etc..
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> {
 | 
			
		||||
 public:  
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 | 
			
		||||
  /////////////////////////// constructors
 | 
			
		||||
  explicit WilsonGaugeAction(RealD beta_):beta(beta_){};
 | 
			
		||||
 | 
			
		||||
  virtual std::string action_name() {return "WilsonGaugeAction";}
 | 
			
		||||
 | 
			
		||||
  virtual std::string LogParameters(){
 | 
			
		||||
    std::stringstream sstream;
 | 
			
		||||
    sstream << GridLogMessage << "[WilsonGaugeAction] Beta: " << beta << std::endl;
 | 
			
		||||
    return sstream.str();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void refresh(const GaugeField &U,
 | 
			
		||||
                       GridParallelRNG &pRNG){};  // noop as no pseudoferms
 | 
			
		||||
 | 
			
		||||
  virtual RealD S(const GaugeField &U) {
 | 
			
		||||
    RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
 | 
			
		||||
    RealD vol = U._grid->gSites();
 | 
			
		||||
    RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5;
 | 
			
		||||
    return action;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
 | 
			
		||||
    // not optimal implementation FIXME
 | 
			
		||||
    // extend Ta to include Lorentz indexes
 | 
			
		||||
 | 
			
		||||
    RealD factor = 0.5 * beta / RealD(Nc);
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField Umu(U._grid);
 | 
			
		||||
    GaugeLinkField dSdU_mu(U._grid);
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      Umu = PeekIndex<LorentzIndex>(U, mu);
 | 
			
		||||
 | 
			
		||||
      // Staple in direction mu
 | 
			
		||||
      WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu);
 | 
			
		||||
      dSdU_mu = Ta(Umu * dSdU_mu) * factor;
 | 
			
		||||
 | 
			
		||||
      PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
private:
 | 
			
		||||
  RealD beta;  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,145 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H
 | 
			
		||||
#define QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
  namespace QCD{
 | 
			
		||||
 | 
			
		||||
    // Base even odd HMC on the normal Mee based schur decomposition.
 | 
			
		||||
    //
 | 
			
		||||
    //     M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo)
 | 
			
		||||
    //         (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         )
 | 
			
		||||
    //
 | 
			
		||||
    // Determinant is det of middle factor
 | 
			
		||||
    // This assumes Mee is indept of U.
 | 
			
		||||
    //
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class SchurDifferentiableOperator :  public SchurDiagMooeeOperator<FermionOperator<Impl>,typename Impl::FermionField> 
 | 
			
		||||
      {
 | 
			
		||||
      public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
        typedef FermionOperator<Impl> Matrix;
 | 
			
		||||
 | 
			
		||||
        SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator<Matrix,FermionField>(Mat) {};
 | 
			
		||||
 | 
			
		||||
        void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
 | 
			
		||||
        
 | 
			
		||||
          GridBase *fgrid   = this->_Mat.FermionGrid();
 | 
			
		||||
          GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
 | 
			
		||||
 | 
			
		||||
          FermionField tmp1(fcbgrid);
 | 
			
		||||
          FermionField tmp2(fcbgrid);
 | 
			
		||||
 | 
			
		||||
          conformable(fcbgrid,U._grid);
 | 
			
		||||
          conformable(fcbgrid,V._grid);
 | 
			
		||||
 | 
			
		||||
          // Assert the checkerboard?? or code for either
 | 
			
		||||
          assert(U.checkerboard==Odd);
 | 
			
		||||
          assert(V.checkerboard==U.checkerboard);
 | 
			
		||||
 | 
			
		||||
          // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE
 | 
			
		||||
          // it is not conformable with the HMC force field
 | 
			
		||||
	  // Case: Ls vectorised fields
 | 
			
		||||
          // INHERIT FROM THE Force field instead
 | 
			
		||||
          GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
 | 
			
		||||
          GaugeField ForceO(forcecb);
 | 
			
		||||
          GaugeField ForceE(forcecb);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
          //  X^dag Der_oe MeeInv Meo Y
 | 
			
		||||
          // Use Mooee as nontrivial but gauge field indept
 | 
			
		||||
          this->_Mat.Meooe   (V,tmp1);      // odd->even -- implicit -0.5 factor to be applied
 | 
			
		||||
	  this->_Mat.MooeeInv(tmp1,tmp2);   // even->even 
 | 
			
		||||
          this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo);
 | 
			
		||||
          //  Accumulate X^dag M_oe MeeInv Der_eo Y
 | 
			
		||||
          this->_Mat.MeooeDag   (U,tmp1);    // even->odd -- implicit -0.5 factor to be applied
 | 
			
		||||
          this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even 
 | 
			
		||||
          this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo);
 | 
			
		||||
          
 | 
			
		||||
          assert(ForceE.checkerboard==Even);
 | 
			
		||||
          assert(ForceO.checkerboard==Odd);
 | 
			
		||||
 | 
			
		||||
          setCheckerboard(Force,ForceE); 
 | 
			
		||||
          setCheckerboard(Force,ForceO);
 | 
			
		||||
          Force=-Force;
 | 
			
		||||
 | 
			
		||||
          delete forcecb;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
 | 
			
		||||
        
 | 
			
		||||
          GridBase *fgrid   = this->_Mat.FermionGrid();
 | 
			
		||||
          GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
 | 
			
		||||
 | 
			
		||||
          FermionField tmp1(fcbgrid);
 | 
			
		||||
          FermionField tmp2(fcbgrid);
 | 
			
		||||
 | 
			
		||||
          conformable(fcbgrid,U._grid);
 | 
			
		||||
          conformable(fcbgrid,V._grid);
 | 
			
		||||
 | 
			
		||||
          // Assert the checkerboard?? or code for either
 | 
			
		||||
          assert(V.checkerboard==Odd);
 | 
			
		||||
          assert(V.checkerboard==V.checkerboard);
 | 
			
		||||
 | 
			
		||||
          // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE
 | 
			
		||||
          // it is not conformable with the HMC force field
 | 
			
		||||
          // INHERIT FROM THE Force field instead
 | 
			
		||||
	  GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
 | 
			
		||||
          GaugeField ForceO(forcecb);
 | 
			
		||||
          GaugeField ForceE(forcecb);
 | 
			
		||||
 | 
			
		||||
          //  X^dag Der_oe MeeInv Meo Y
 | 
			
		||||
          // Use Mooee as nontrivial but gauge field indept
 | 
			
		||||
          this->_Mat.MeooeDag   (V,tmp1);      // odd->even -- implicit -0.5 factor to be applied
 | 
			
		||||
          this->_Mat.MooeeInvDag(tmp1,tmp2);   // even->even 
 | 
			
		||||
          this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes);
 | 
			
		||||
          
 | 
			
		||||
          //  Accumulate X^dag M_oe MeeInv Der_eo Y
 | 
			
		||||
          this->_Mat.Meooe   (U,tmp1);    // even->odd -- implicit -0.5 factor to be applied
 | 
			
		||||
          this->_Mat.MooeeInv(tmp1,tmp2); // even->even 
 | 
			
		||||
          this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes);
 | 
			
		||||
 | 
			
		||||
          assert(ForceE.checkerboard==Even);
 | 
			
		||||
          assert(ForceO.checkerboard==Odd);
 | 
			
		||||
 | 
			
		||||
          setCheckerboard(Force,ForceE); 
 | 
			
		||||
          setCheckerboard(Force,ForceO);
 | 
			
		||||
          Force=-Force;
 | 
			
		||||
 | 
			
		||||
          delete forcecb;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,264 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/pseudofermion/ExactOneFlavourRatio.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: David Murphy <dmurphy@phys.columbia.edu>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////
 | 
			
		||||
// Implementation of exact one flavour algorithm (EOFA)         //
 | 
			
		||||
// using fermion classes defined in:                           //
 | 
			
		||||
//    Grid/qcd/action/fermion/DomainWallEOFAFermion.h (Shamir) //
 | 
			
		||||
//    Grid/qcd/action/fermion/MobiusEOFAFermion.h (Mobius)     //
 | 
			
		||||
// arXiv: 1403.1683, 1706.05843                                //
 | 
			
		||||
/////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
#ifndef QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
 | 
			
		||||
#define QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // Exact one flavour implementation of DWF determinant ratio //
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  template<class Impl>
 | 
			
		||||
  class ExactOneFlavourRatioPseudoFermionAction : public Action<typename Impl::GaugeField>
 | 
			
		||||
  {
 | 
			
		||||
    public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
      typedef OneFlavourRationalParams Params;
 | 
			
		||||
      Params param;
 | 
			
		||||
      MultiShiftFunction PowerNegHalf;
 | 
			
		||||
 | 
			
		||||
    private:
 | 
			
		||||
      bool use_heatbath_forecasting;
 | 
			
		||||
      AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
 | 
			
		||||
      AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
 | 
			
		||||
      SchurRedBlackDiagMooeeSolve<FermionField> Solver;
 | 
			
		||||
      FermionField Phi; // the pseudofermion field for this trajectory
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
 | 
			
		||||
        OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), Solver(S),
 | 
			
		||||
        Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
 | 
			
		||||
      {
 | 
			
		||||
        AlgRemez remez(param.lo, param.hi, param.precision);
 | 
			
		||||
 | 
			
		||||
        // MdagM^(+- 1/2)
 | 
			
		||||
        std::cout << GridLogMessage << "Generating degree " << param.degree << " for x^(-1/2)" << std::endl;
 | 
			
		||||
        remez.generateApprox(param.degree, 1, 2);
 | 
			
		||||
        PowerNegHalf.Init(remez, param.tolerance, true);
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      virtual std::string action_name() { return "ExactOneFlavourRatioPseudoFermionAction"; }
 | 
			
		||||
 | 
			
		||||
      virtual std::string LogParameters() {
 | 
			
		||||
        std::stringstream sstream;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] Low            :" << param.lo << std::endl;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] High           :" << param.hi << std::endl;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] Max iterations :" << param.MaxIter << std::endl;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] Tolerance      :" << param.tolerance << std::endl;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] Degree         :" << param.degree << std::endl;
 | 
			
		||||
        sstream << GridLogMessage << "[" << action_name() << "] Precision      :" << param.precision << std::endl;
 | 
			
		||||
        return sstream.str();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Spin projection
 | 
			
		||||
      void spProj(const FermionField& in, FermionField& out, int sign, int Ls)
 | 
			
		||||
      {
 | 
			
		||||
        if(sign == 1){ for(int s=0; s<Ls; ++s){ axpby_ssp_pplus(out, 0.0, in, 1.0, in, s, s); } }
 | 
			
		||||
        else{ for(int s=0; s<Ls; ++s){ axpby_ssp_pminus(out, 0.0, in, 1.0, in, s, s); } }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // EOFA heatbath: see Eqn. (29) of arXiv:1706.05843
 | 
			
		||||
      // We generate a Gaussian noise vector \eta, and then compute
 | 
			
		||||
      //  \Phi = M_{\rm EOFA}^{-1/2} * \eta
 | 
			
		||||
      // using a rational approximation to the inverse square root
 | 
			
		||||
      virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
 | 
			
		||||
      {
 | 
			
		||||
        Lop.ImportGauge(U);
 | 
			
		||||
        Rop.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        FermionField eta         (Lop.FermionGrid());
 | 
			
		||||
        FermionField CG_src      (Lop.FermionGrid());
 | 
			
		||||
        FermionField CG_soln     (Lop.FermionGrid());
 | 
			
		||||
        FermionField Forecast_src(Lop.FermionGrid());
 | 
			
		||||
        std::vector<FermionField> tmp(2, Lop.FermionGrid());
 | 
			
		||||
 | 
			
		||||
        // Use chronological inverter to forecast solutions across poles
 | 
			
		||||
        std::vector<FermionField> prev_solns;
 | 
			
		||||
        if(use_heatbath_forecasting){ prev_solns.reserve(param.degree); }
 | 
			
		||||
        ChronoForecast<AbstractEOFAFermion<Impl>, FermionField> Forecast;
 | 
			
		||||
 | 
			
		||||
        // Seed with Gaussian noise vector (var = 0.5)
 | 
			
		||||
        RealD scale = std::sqrt(0.5);
 | 
			
		||||
        gaussian(pRNG,eta);
 | 
			
		||||
        eta = eta * scale;
 | 
			
		||||
        printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
 | 
			
		||||
 | 
			
		||||
        // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
 | 
			
		||||
        RealD N(PowerNegHalf.norm);
 | 
			
		||||
        for(int k=0; k<param.degree; ++k){ N += PowerNegHalf.residues[k] / ( 1.0 + PowerNegHalf.poles[k] ); }
 | 
			
		||||
        Phi = eta * N;
 | 
			
		||||
 | 
			
		||||
        // LH terms:
 | 
			
		||||
        // \Phi = \Phi + k \sum_{k=1}^{N_{p}} P_{-} \Omega_{-}^{\dagger} ( H(mf)
 | 
			
		||||
        //          - \gamma_{l} \Delta_{-}(mf,mb) P_{-} )^{-1} \Omega_{-} P_{-} \eta
 | 
			
		||||
        RealD gamma_l(0.0);
 | 
			
		||||
        spProj(eta, tmp[0], -1, Lop.Ls);
 | 
			
		||||
        Lop.Omega(tmp[0], tmp[1], -1, 0);
 | 
			
		||||
        G5R5(CG_src, tmp[1]);
 | 
			
		||||
        tmp[1] = zero;
 | 
			
		||||
        for(int k=0; k<param.degree; ++k){
 | 
			
		||||
          gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
 | 
			
		||||
          Lop.RefreshShiftCoefficients(-gamma_l);
 | 
			
		||||
          if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
 | 
			
		||||
            Lop.Mdag(CG_src, Forecast_src);
 | 
			
		||||
            CG_soln = Forecast(Lop, Forecast_src, prev_solns);
 | 
			
		||||
            Solver(Lop, CG_src, CG_soln);
 | 
			
		||||
            prev_solns.push_back(CG_soln);
 | 
			
		||||
          } else {
 | 
			
		||||
            CG_soln = zero; // Just use zero as the initial guess
 | 
			
		||||
            Solver(Lop, CG_src, CG_soln);
 | 
			
		||||
          }
 | 
			
		||||
          Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
 | 
			
		||||
          tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
 | 
			
		||||
        }
 | 
			
		||||
        Lop.Omega(tmp[1], tmp[0], -1, 1);
 | 
			
		||||
        spProj(tmp[0], tmp[1], -1, Lop.Ls);
 | 
			
		||||
        Phi = Phi + tmp[1];
 | 
			
		||||
 | 
			
		||||
        // RH terms:
 | 
			
		||||
        // \Phi = \Phi - k \sum_{k=1}^{N_{p}} P_{+} \Omega_{+}^{\dagger} ( H(mb)
 | 
			
		||||
        //          + \gamma_{l} \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \eta
 | 
			
		||||
        spProj(eta, tmp[0], 1, Rop.Ls);
 | 
			
		||||
        Rop.Omega(tmp[0], tmp[1], 1, 0);
 | 
			
		||||
        G5R5(CG_src, tmp[1]);
 | 
			
		||||
        tmp[1] = zero;
 | 
			
		||||
        if(use_heatbath_forecasting){ prev_solns.clear(); } // empirically, LH solns don't help for RH solves
 | 
			
		||||
        for(int k=0; k<param.degree; ++k){
 | 
			
		||||
          gamma_l = 1.0 / ( 1.0 + PowerNegHalf.poles[k] );
 | 
			
		||||
          Rop.RefreshShiftCoefficients(-gamma_l*PowerNegHalf.poles[k]);
 | 
			
		||||
          if(use_heatbath_forecasting){
 | 
			
		||||
            Rop.Mdag(CG_src, Forecast_src);
 | 
			
		||||
            CG_soln = Forecast(Rop, Forecast_src, prev_solns);
 | 
			
		||||
            Solver(Rop, CG_src, CG_soln);
 | 
			
		||||
            prev_solns.push_back(CG_soln);
 | 
			
		||||
          } else {
 | 
			
		||||
            CG_soln = zero;
 | 
			
		||||
            Solver(Rop, CG_src, CG_soln);
 | 
			
		||||
          }
 | 
			
		||||
          Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
 | 
			
		||||
          tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
 | 
			
		||||
        }
 | 
			
		||||
        Rop.Omega(tmp[1], tmp[0], 1, 1);
 | 
			
		||||
        spProj(tmp[0], tmp[1], 1, Rop.Ls);
 | 
			
		||||
        Phi = Phi + tmp[1];
 | 
			
		||||
 | 
			
		||||
        // Reset shift coefficients for energy and force evals
 | 
			
		||||
        Lop.RefreshShiftCoefficients(0.0);
 | 
			
		||||
        Rop.RefreshShiftCoefficients(-1.0);
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      // EOFA action: see Eqn. (10) of arXiv:1706.05843
 | 
			
		||||
      virtual RealD S(const GaugeField& U)
 | 
			
		||||
      {
 | 
			
		||||
        Lop.ImportGauge(U);
 | 
			
		||||
        Rop.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        FermionField spProj_Phi(Lop.FermionGrid());
 | 
			
		||||
        std::vector<FermionField> tmp(2, Lop.FermionGrid());
 | 
			
		||||
 | 
			
		||||
        // S = <\Phi|\Phi>
 | 
			
		||||
        RealD action(norm2(Phi));
 | 
			
		||||
 | 
			
		||||
        // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
 | 
			
		||||
        spProj(Phi, spProj_Phi, -1, Lop.Ls);
 | 
			
		||||
        Lop.Omega(spProj_Phi, tmp[0], -1, 0);
 | 
			
		||||
        G5R5(tmp[1], tmp[0]);
 | 
			
		||||
        tmp[0] = zero;
 | 
			
		||||
        Solver(Lop, tmp[1], tmp[0]);
 | 
			
		||||
        Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
 | 
			
		||||
        Lop.Omega(tmp[1], tmp[0], -1, 1);
 | 
			
		||||
        action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
 | 
			
		||||
 | 
			
		||||
        // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
 | 
			
		||||
        //               - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
 | 
			
		||||
        spProj(Phi, spProj_Phi, 1, Rop.Ls);
 | 
			
		||||
        Rop.Omega(spProj_Phi, tmp[0], 1, 0);
 | 
			
		||||
        G5R5(tmp[1], tmp[0]);
 | 
			
		||||
        tmp[0] = zero;
 | 
			
		||||
        Solver(Rop, tmp[1], tmp[0]);
 | 
			
		||||
        Rop.Dtilde(tmp[0], tmp[1]);
 | 
			
		||||
        Rop.Omega(tmp[1], tmp[0], 1, 1);
 | 
			
		||||
        action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
 | 
			
		||||
 | 
			
		||||
        return action;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      // EOFA pseudofermion force: see Eqns. (34)-(36) of arXiv:1706.05843
 | 
			
		||||
      virtual void deriv(const GaugeField& U, GaugeField& dSdU)
 | 
			
		||||
      {
 | 
			
		||||
        Lop.ImportGauge(U);
 | 
			
		||||
        Rop.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        FermionField spProj_Phi      (Lop.FermionGrid());
 | 
			
		||||
        FermionField Omega_spProj_Phi(Lop.FermionGrid());
 | 
			
		||||
        FermionField CG_src          (Lop.FermionGrid());
 | 
			
		||||
        FermionField Chi             (Lop.FermionGrid());
 | 
			
		||||
        FermionField g5_R5_Chi       (Lop.FermionGrid());
 | 
			
		||||
 | 
			
		||||
        GaugeField force(Lop.GaugeGrid());
 | 
			
		||||
 | 
			
		||||
        // LH: dSdU = k \chi_{L}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{L}
 | 
			
		||||
        //     \chi_{L} = H(mf)^{-1} \Omega_{-} P_{-} \Phi
 | 
			
		||||
        spProj(Phi, spProj_Phi, -1, Lop.Ls);
 | 
			
		||||
        Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
 | 
			
		||||
        G5R5(CG_src, Omega_spProj_Phi);
 | 
			
		||||
        spProj_Phi = zero;
 | 
			
		||||
        Solver(Lop, CG_src, spProj_Phi);
 | 
			
		||||
        Lop.Dtilde(spProj_Phi, Chi);
 | 
			
		||||
        G5R5(g5_R5_Chi, Chi);
 | 
			
		||||
        Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
 | 
			
		||||
        dSdU = Lop.k * force;
 | 
			
		||||
 | 
			
		||||
        // RH: dSdU = dSdU - k \chi_{R}^{\dagger} \gamma_{5} R_{5} ( \partial_{x,\mu} D_{w} ) \chi_{}
 | 
			
		||||
        //     \chi_{R} = ( H(mb) - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{+} P_{+} \Phi
 | 
			
		||||
        spProj(Phi, spProj_Phi, 1, Rop.Ls);
 | 
			
		||||
        Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
 | 
			
		||||
        G5R5(CG_src, Omega_spProj_Phi);
 | 
			
		||||
        spProj_Phi = zero;
 | 
			
		||||
        Solver(Rop, CG_src, spProj_Phi);
 | 
			
		||||
        Rop.Dtilde(spProj_Phi, Chi);
 | 
			
		||||
        G5R5(g5_R5_Chi, Chi);
 | 
			
		||||
        Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
 | 
			
		||||
        dSdU = dSdU - Rop.k * force;
 | 
			
		||||
      };
 | 
			
		||||
  };
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,209 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H
 | 
			
		||||
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
  namespace QCD{
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    // Two flavour ratio
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class TwoFlavourEvenOddRatioPseudoFermionAction : public Action<typename Impl::GaugeField> {
 | 
			
		||||
    public:
 | 
			
		||||
      INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
    private:
 | 
			
		||||
      FermionOperator<Impl> & NumOp;// the basic operator
 | 
			
		||||
      FermionOperator<Impl> & DenOp;// the basic operator
 | 
			
		||||
 | 
			
		||||
      OperatorFunction<FermionField> &DerivativeSolver;
 | 
			
		||||
      OperatorFunction<FermionField> &ActionSolver;
 | 
			
		||||
 | 
			
		||||
      FermionField PhiOdd;   // the pseudo fermion field for this trajectory
 | 
			
		||||
      FermionField PhiEven;  // the pseudo fermion field for this trajectory
 | 
			
		||||
 | 
			
		||||
    public:
 | 
			
		||||
      TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
 | 
			
		||||
                                                FermionOperator<Impl>  &_DenOp, 
 | 
			
		||||
                                                OperatorFunction<FermionField> & DS,
 | 
			
		||||
                                                OperatorFunction<FermionField> & AS) :
 | 
			
		||||
      NumOp(_NumOp), 
 | 
			
		||||
      DenOp(_DenOp), 
 | 
			
		||||
      DerivativeSolver(DS), 
 | 
			
		||||
      ActionSolver(AS),
 | 
			
		||||
      PhiEven(_NumOp.FermionRedBlackGrid()),
 | 
			
		||||
      PhiOdd(_NumOp.FermionRedBlackGrid()) 
 | 
			
		||||
        {
 | 
			
		||||
          conformable(_NumOp.FermionGrid(), _DenOp.FermionGrid());
 | 
			
		||||
          conformable(_NumOp.FermionRedBlackGrid(), _DenOp.FermionRedBlackGrid());
 | 
			
		||||
          conformable(_NumOp.GaugeGrid(), _DenOp.GaugeGrid());
 | 
			
		||||
          conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
      virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";}
 | 
			
		||||
 | 
			
		||||
      virtual std::string LogParameters(){
 | 
			
		||||
	std::stringstream sstream;
 | 
			
		||||
	sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
 | 
			
		||||
	return sstream.str();
 | 
			
		||||
      } 
 | 
			
		||||
 | 
			
		||||
      
 | 
			
		||||
      virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
 | 
			
		||||
 | 
			
		||||
        // P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
 | 
			
		||||
        //
 | 
			
		||||
        // NumOp == V
 | 
			
		||||
        // DenOp == M
 | 
			
		||||
        //
 | 
			
		||||
        // Take phi_o = Vpcdag^{-1} Mpcdag eta_o  ; eta_o = Mpcdag^{-1} Vpcdag Phi
 | 
			
		||||
        //
 | 
			
		||||
        // P(eta_o) = e^{- eta_o^dag eta_o}
 | 
			
		||||
        //
 | 
			
		||||
        // e^{x^2/2 sig^2} => sig^2 = 0.5.
 | 
			
		||||
        // 
 | 
			
		||||
        RealD scale = std::sqrt(0.5);
 | 
			
		||||
 | 
			
		||||
        FermionField eta    (NumOp.FermionGrid());
 | 
			
		||||
        FermionField etaOdd (NumOp.FermionRedBlackGrid());
 | 
			
		||||
        FermionField etaEven(NumOp.FermionRedBlackGrid());
 | 
			
		||||
        FermionField tmp    (NumOp.FermionRedBlackGrid());
 | 
			
		||||
 | 
			
		||||
        gaussian(pRNG,eta);
 | 
			
		||||
 | 
			
		||||
        pickCheckerboard(Even,etaEven,eta);
 | 
			
		||||
        pickCheckerboard(Odd,etaOdd,eta);
 | 
			
		||||
 | 
			
		||||
        NumOp.ImportGauge(U);
 | 
			
		||||
        DenOp.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Mpc(DenOp);
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Vpc(NumOp);
 | 
			
		||||
 | 
			
		||||
        // Odd det factors
 | 
			
		||||
        Mpc.MpcDag(etaOdd,PhiOdd);
 | 
			
		||||
        tmp=zero;
 | 
			
		||||
        ActionSolver(Vpc,PhiOdd,tmp);
 | 
			
		||||
        Vpc.Mpc(tmp,PhiOdd);            
 | 
			
		||||
 | 
			
		||||
        // Even det factors
 | 
			
		||||
        DenOp.MooeeDag(etaEven,tmp);
 | 
			
		||||
        NumOp.MooeeInvDag(tmp,PhiEven);
 | 
			
		||||
 | 
			
		||||
        PhiOdd =PhiOdd*scale;
 | 
			
		||||
        PhiEven=PhiEven*scale;
 | 
			
		||||
        
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      // S = phi^dag V (Mdag M)^-1 Vdag phi
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      virtual RealD S(const GaugeField &U) {
 | 
			
		||||
 | 
			
		||||
        NumOp.ImportGauge(U);
 | 
			
		||||
        DenOp.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Mpc(DenOp);
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Vpc(NumOp);
 | 
			
		||||
 | 
			
		||||
        FermionField X(NumOp.FermionRedBlackGrid());
 | 
			
		||||
        FermionField Y(NumOp.FermionRedBlackGrid());
 | 
			
		||||
 | 
			
		||||
        Vpc.MpcDag(PhiOdd,Y);           // Y= Vdag phi
 | 
			
		||||
        X=zero;
 | 
			
		||||
        ActionSolver(Mpc,Y,X);          // X= (MdagM)^-1 Vdag phi
 | 
			
		||||
        //Mpc.Mpc(X,Y);                   // Y=  Mdag^-1 Vdag phi
 | 
			
		||||
        // Multiply by Ydag
 | 
			
		||||
        RealD action = real(innerProduct(Y,X));
 | 
			
		||||
 | 
			
		||||
        //RealD action = norm2(Y);
 | 
			
		||||
 | 
			
		||||
        // The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
 | 
			
		||||
        // Only really clover term that creates this. Leave the EE portion as a future to do to make most
 | 
			
		||||
        // rapid progresss on DWF for now.
 | 
			
		||||
        //
 | 
			
		||||
        NumOp.MooeeDag(PhiEven,X);
 | 
			
		||||
        DenOp.MooeeInvDag(X,Y);
 | 
			
		||||
        action = action + norm2(Y);
 | 
			
		||||
 | 
			
		||||
        return action;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      // dS/du = phi^dag dV (Mdag M)^-1 V^dag  phi
 | 
			
		||||
      //       - phi^dag V (Mdag M)^-1 [ Mdag dM + dMdag M ]  (Mdag M)^-1 V^dag  phi
 | 
			
		||||
      //       + phi^dag V (Mdag M)^-1 dV^dag  phi
 | 
			
		||||
      //////////////////////////////////////////////////////
 | 
			
		||||
      virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
 | 
			
		||||
 | 
			
		||||
        NumOp.ImportGauge(U);
 | 
			
		||||
        DenOp.ImportGauge(U);
 | 
			
		||||
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Mpc(DenOp);
 | 
			
		||||
        SchurDifferentiableOperator<Impl> Vpc(NumOp);
 | 
			
		||||
 | 
			
		||||
        FermionField  X(NumOp.FermionRedBlackGrid());
 | 
			
		||||
        FermionField  Y(NumOp.FermionRedBlackGrid());
 | 
			
		||||
 | 
			
		||||
        // This assignment is necessary to be compliant with the HMC grids
 | 
			
		||||
	GaugeField force(dSdU._grid);
 | 
			
		||||
 | 
			
		||||
        //Y=Vdag phi
 | 
			
		||||
        //X = (Mdag M)^-1 V^dag phi
 | 
			
		||||
        //Y = (Mdag)^-1 V^dag  phi
 | 
			
		||||
        Vpc.MpcDag(PhiOdd,Y);          // Y= Vdag phi
 | 
			
		||||
        X=zero;
 | 
			
		||||
        DerivativeSolver(Mpc,Y,X);     // X= (MdagM)^-1 Vdag phi
 | 
			
		||||
        Mpc.Mpc(X,Y);                  // Y=  Mdag^-1 Vdag phi
 | 
			
		||||
 | 
			
		||||
        // phi^dag V (Mdag M)^-1 dV^dag  phi
 | 
			
		||||
        Vpc.MpcDagDeriv(force , X, PhiOdd );   dSdU = force;
 | 
			
		||||
  
 | 
			
		||||
        // phi^dag dV (Mdag M)^-1 V^dag  phi
 | 
			
		||||
        Vpc.MpcDeriv(force , PhiOdd, X );      dSdU = dSdU+force;
 | 
			
		||||
 | 
			
		||||
        //    -    phi^dag V (Mdag M)^-1 Mdag dM   (Mdag M)^-1 V^dag  phi
 | 
			
		||||
        //    -    phi^dag V (Mdag M)^-1 dMdag M   (Mdag M)^-1 V^dag  phi
 | 
			
		||||
        Mpc.MpcDeriv(force,Y,X);              dSdU = dSdU-force;
 | 
			
		||||
        Mpc.MpcDagDeriv(force,X,Y);           dSdU = dSdU-force;
 | 
			
		||||
 | 
			
		||||
        // FIXME No force contribution from EvenEven assumed here
 | 
			
		||||
        // Needs a fix for clover.
 | 
			
		||||
        assert(NumOp.ConstEE() == 1);
 | 
			
		||||
        assert(DenOp.ConstEE() == 1);
 | 
			
		||||
 | 
			
		||||
        dSdU = -dSdU;
 | 
			
		||||
        
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,50 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/Scalar.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_QCD_SCALAR_H
 | 
			
		||||
#define GRID_QCD_SCALAR_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/action/scalar/ScalarImpl.h>
 | 
			
		||||
#include <Grid/qcd/action/scalar/ScalarAction.h>
 | 
			
		||||
#include <Grid/qcd/action/scalar/ScalarInteractionAction.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
  typedef ScalarAction<ScalarImplR>                 ScalarActionR;
 | 
			
		||||
  typedef ScalarAction<ScalarImplF>                 ScalarActionF;
 | 
			
		||||
  typedef ScalarAction<ScalarImplD>                 ScalarActionD;
 | 
			
		||||
 | 
			
		||||
  template <int Colours, int Dimensions> using ScalarAdjActionR = ScalarInteractionAction<ScalarNxNAdjImplR<Colours>, Dimensions>;
 | 
			
		||||
  template <int Colours, int Dimensions> using ScalarAdjActionF = ScalarInteractionAction<ScalarNxNAdjImplF<Colours>, Dimensions>;
 | 
			
		||||
  template <int Colours, int Dimensions> using ScalarAdjActionD = ScalarInteractionAction<ScalarNxNAdjImplD<Colours>, Dimensions>;
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // GRID_QCD_SCALAR_H
 | 
			
		||||
@@ -1,83 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
  Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
  Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
  Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
  Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
  Author: neo <cossu@post.kek.jp>
 | 
			
		||||
  Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
  This program is free software; you can redistribute it and/or modify
 | 
			
		||||
  it under the terms of the GNU General Public License as published by
 | 
			
		||||
  the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
  (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
  This program is distributed in the hope that it will be useful,
 | 
			
		||||
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
  GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
  You should have received a copy of the GNU General Public License along
 | 
			
		||||
  with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
  See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
  *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR_ACTION_H
 | 
			
		||||
#define SCALAR_ACTION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  // FIXME drop the QCD namespace everywhere here
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class ScalarAction : public QCD::Action<typename Impl::Field> {
 | 
			
		||||
 public:
 | 
			
		||||
    INHERIT_FIELD_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
    RealD mass_square;
 | 
			
		||||
    RealD lambda;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
    ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {}
 | 
			
		||||
 | 
			
		||||
    virtual std::string LogParameters() {
 | 
			
		||||
      std::stringstream sstream;
 | 
			
		||||
      sstream << GridLogMessage << "[ScalarAction] lambda      : " << lambda      << std::endl;
 | 
			
		||||
      sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
 | 
			
		||||
      return sstream.str();
 | 
			
		||||
    }
 | 
			
		||||
    virtual std::string action_name() {return "ScalarAction";}
 | 
			
		||||
 | 
			
		||||
    virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}  // noop as no pseudoferms
 | 
			
		||||
 | 
			
		||||
    virtual RealD S(const Field &p) {
 | 
			
		||||
      return (mass_square * 0.5 + QCD::Nd) * ScalarObs<Impl>::sumphisquared(p) +
 | 
			
		||||
    (lambda / 24.) * ScalarObs<Impl>::sumphifourth(p) +
 | 
			
		||||
    ScalarObs<Impl>::sumphider(p);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    virtual void deriv(const Field &p,
 | 
			
		||||
                       Field &force) {
 | 
			
		||||
      Field tmp(p._grid);
 | 
			
		||||
      Field p2(p._grid);
 | 
			
		||||
      ScalarObs<Impl>::phisquared(p2, p);
 | 
			
		||||
      tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1));
 | 
			
		||||
      for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
 | 
			
		||||
 | 
			
		||||
      force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif // SCALAR_ACTION_H
 | 
			
		||||
@@ -1,263 +0,0 @@
 | 
			
		||||
#ifndef SCALAR_IMPL
 | 
			
		||||
#define SCALAR_IMPL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  //namespace QCD {
 | 
			
		||||
 | 
			
		||||
template <class S>
 | 
			
		||||
class ScalarImplTypes {
 | 
			
		||||
 public:
 | 
			
		||||
    typedef S Simd;
 | 
			
		||||
 | 
			
		||||
    template <typename vtype>
 | 
			
		||||
    using iImplField = iScalar<iScalar<iScalar<vtype> > >;
 | 
			
		||||
 | 
			
		||||
    typedef iImplField<Simd> SiteField;
 | 
			
		||||
    typedef SiteField        SitePropagator;
 | 
			
		||||
    typedef SiteField        SiteComplex;
 | 
			
		||||
 | 
			
		||||
    typedef Lattice<SiteField> Field;
 | 
			
		||||
    typedef Field              ComplexField;
 | 
			
		||||
    typedef Field              FermionField;
 | 
			
		||||
    typedef Field              PropagatorField;
 | 
			
		||||
 | 
			
		||||
    static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
 | 
			
		||||
      gaussian(pRNG, P);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline Field projectForce(Field& P){return P;}
 | 
			
		||||
 | 
			
		||||
    static inline void update_field(Field& P, Field& U, double ep) {
 | 
			
		||||
      U += P*ep;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline RealD FieldSquareNorm(Field& U) {
 | 
			
		||||
      return (- sum(trace(U*U))/2.0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      gaussian(pRNG, U);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      gaussian(pRNG, U);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      U = 1.0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static void MomentumSpacePropagator(Field &out, RealD m)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase           *grid = out._grid;
 | 
			
		||||
      Field              kmu(grid), one(grid);
 | 
			
		||||
      const unsigned int nd    = grid->_ndimension;
 | 
			
		||||
      std::vector<int>   &l    = grid->_fdimensions;
 | 
			
		||||
 | 
			
		||||
      one = Complex(1.0,0.0);
 | 
			
		||||
      out = m*m;
 | 
			
		||||
      for(int mu = 0; mu < nd; mu++)
 | 
			
		||||
      {
 | 
			
		||||
        Real twoPiL = M_PI*2./l[mu];
 | 
			
		||||
 | 
			
		||||
        LatticeCoordinate(kmu,mu);
 | 
			
		||||
        kmu = 2.*sin(.5*twoPiL*kmu);
 | 
			
		||||
        out = out + kmu*kmu;
 | 
			
		||||
      }
 | 
			
		||||
      out = one/out;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static void FreePropagator(const Field &in, Field &out,
 | 
			
		||||
                               const Field &momKernel)
 | 
			
		||||
    {
 | 
			
		||||
      FFT   fft((GridCartesian *)in._grid);
 | 
			
		||||
      Field inFT(in._grid);
 | 
			
		||||
 | 
			
		||||
      fft.FFT_all_dim(inFT, in, FFT::forward);
 | 
			
		||||
      inFT = inFT*momKernel;
 | 
			
		||||
      fft.FFT_all_dim(out, inFT, FFT::backward);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static void FreePropagator(const Field &in, Field &out, RealD m)
 | 
			
		||||
    {
 | 
			
		||||
      Field momKernel(in._grid);
 | 
			
		||||
 | 
			
		||||
      MomentumSpacePropagator(momKernel, m);
 | 
			
		||||
      FreePropagator(in, out, momKernel);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  #ifdef  USE_FFT_ACCELERATION
 | 
			
		||||
  #ifndef FFT_MASS
 | 
			
		||||
  #error  "USE_FFT_ACCELERATION is defined but not FFT_MASS"
 | 
			
		||||
  #endif
 | 
			
		||||
  #endif
 | 
			
		||||
  
 | 
			
		||||
  template <class S, unsigned int N>
 | 
			
		||||
  class ScalarAdjMatrixImplTypes {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef S Simd;
 | 
			
		||||
    typedef QCD::SU<N> Group;
 | 
			
		||||
 | 
			
		||||
    template <typename vtype>
 | 
			
		||||
    using iImplField   = iScalar<iScalar<iMatrix<vtype, N>>>;
 | 
			
		||||
    template <typename vtype>
 | 
			
		||||
    using iImplComplex = iScalar<iScalar<iScalar<vtype>>>;
 | 
			
		||||
 | 
			
		||||
    typedef iImplField<Simd>   SiteField;
 | 
			
		||||
    typedef SiteField          SitePropagator;
 | 
			
		||||
    typedef iImplComplex<Simd> SiteComplex;
 | 
			
		||||
 | 
			
		||||
    typedef Lattice<SiteField>   Field;
 | 
			
		||||
    typedef Lattice<SiteComplex> ComplexField;
 | 
			
		||||
    typedef Field                FermionField;
 | 
			
		||||
    typedef Field                PropagatorField;
 | 
			
		||||
 | 
			
		||||
    static void MomentaSquare(ComplexField &out)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = out._grid;
 | 
			
		||||
      const std::vector<int> &l = grid->FullDimensions();
 | 
			
		||||
      ComplexField kmu(grid);
 | 
			
		||||
 | 
			
		||||
      for (int mu = 0; mu < grid->Nd(); mu++)
 | 
			
		||||
      {
 | 
			
		||||
        Real twoPiL = M_PI * 2.0 / l[mu];
 | 
			
		||||
        LatticeCoordinate(kmu, mu);
 | 
			
		||||
        kmu = 2.0 * sin(0.5 * twoPiL * kmu);
 | 
			
		||||
        out += kmu * kmu;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static void MomentumSpacePropagator(ComplexField &out, RealD m)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = out._grid;
 | 
			
		||||
      ComplexField one(grid);
 | 
			
		||||
      one = Complex(1.0, 0.0);
 | 
			
		||||
      out = m * m;
 | 
			
		||||
      MomentaSquare(out);
 | 
			
		||||
      out = one / out;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
 | 
			
		||||
    {
 | 
			
		||||
#ifndef USE_FFT_ACCELERATION
 | 
			
		||||
      Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
      Field Pgaussian(P._grid), Pp(P._grid);
 | 
			
		||||
      ComplexField p2(P._grid); p2 = zero;
 | 
			
		||||
      RealD M = FFT_MASS;
 | 
			
		||||
      
 | 
			
		||||
      Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
 | 
			
		||||
 | 
			
		||||
      FFT theFFT((GridCartesian*)P._grid);
 | 
			
		||||
      theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward);
 | 
			
		||||
      MomentaSquare(p2);
 | 
			
		||||
      p2 += M * M;
 | 
			
		||||
      p2 = sqrt(p2);
 | 
			
		||||
      Pp *= p2;
 | 
			
		||||
      theFFT.FFT_all_dim(P, Pp, FFT::backward);
 | 
			
		||||
 | 
			
		||||
#endif //USE_FFT_ACCELERATION
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline Field projectForce(Field& P) {return P;}
 | 
			
		||||
 | 
			
		||||
    static inline void update_field(Field &P, Field &U, double ep)
 | 
			
		||||
    {
 | 
			
		||||
#ifndef USE_FFT_ACCELERATION
 | 
			
		||||
      double t0=usecond(); 
 | 
			
		||||
      U += P * ep;
 | 
			
		||||
      double t1=usecond();
 | 
			
		||||
      double total_time = (t1-t0)/1e6;
 | 
			
		||||
      std::cout << GridLogIntegrator << "Total time for updating field (s)       : " << total_time << std::endl; 
 | 
			
		||||
#else
 | 
			
		||||
      // FFT transform P(x) -> P(p)
 | 
			
		||||
      // divide by (M^2+p^2)  M external parameter (how to pass?)
 | 
			
		||||
      // P'(p) = P(p)/(M^2+p^2)
 | 
			
		||||
      // Transform back -> P'(x)
 | 
			
		||||
      // U += P'(x)*ep
 | 
			
		||||
 | 
			
		||||
      Field Pp(U._grid), P_FFT(U._grid);     
 | 
			
		||||
      static ComplexField p2(U._grid);
 | 
			
		||||
      RealD M = FFT_MASS;
 | 
			
		||||
      
 | 
			
		||||
      FFT theFFT((GridCartesian*)U._grid);
 | 
			
		||||
      theFFT.FFT_all_dim(Pp, P, FFT::forward);
 | 
			
		||||
 | 
			
		||||
      static bool first_call = true;
 | 
			
		||||
      if (first_call)
 | 
			
		||||
      {
 | 
			
		||||
        // avoid recomputing
 | 
			
		||||
        MomentumSpacePropagator(p2, M);
 | 
			
		||||
        first_call = false;
 | 
			
		||||
      }
 | 
			
		||||
      Pp *= p2;
 | 
			
		||||
      theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward);
 | 
			
		||||
      U += P_FFT * ep;
 | 
			
		||||
 | 
			
		||||
#endif //USE_FFT_ACCELERATION
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline RealD FieldSquareNorm(Field &U)
 | 
			
		||||
    {
 | 
			
		||||
#ifndef USE_FFT_ACCELERATION
 | 
			
		||||
      return (TensorRemove(sum(trace(U * U))).real());
 | 
			
		||||
#else
 | 
			
		||||
      // In case of Fourier acceleration we have to:
 | 
			
		||||
      // compute U(p)*U(p)/(M^2+p^2))   Parseval theorem
 | 
			
		||||
      // 1 FFT needed U(x) -> U(p)
 | 
			
		||||
      // M to be passed
 | 
			
		||||
 | 
			
		||||
      FFT theFFT((GridCartesian*)U._grid);
 | 
			
		||||
      Field Up(U._grid);
 | 
			
		||||
 | 
			
		||||
      theFFT.FFT_all_dim(Up, U, FFT::forward);
 | 
			
		||||
      RealD M = FFT_MASS;
 | 
			
		||||
      ComplexField p2(U._grid);
 | 
			
		||||
      MomentumSpacePropagator(p2, M);
 | 
			
		||||
      Field Up2 = Up * p2;
 | 
			
		||||
      // from the definition of the DFT we need to divide by the volume
 | 
			
		||||
      return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites());
 | 
			
		||||
#endif //USE_FFT_ACCELERATION
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
 | 
			
		||||
      U = zero;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  typedef ScalarImplTypes<vReal> ScalarImplR;
 | 
			
		||||
  typedef ScalarImplTypes<vRealF> ScalarImplF;
 | 
			
		||||
  typedef ScalarImplTypes<vRealD> ScalarImplD;
 | 
			
		||||
  typedef ScalarImplTypes<vComplex> ScalarImplCR;
 | 
			
		||||
  typedef ScalarImplTypes<vComplexF> ScalarImplCF;
 | 
			
		||||
  typedef ScalarImplTypes<vComplexD> ScalarImplCD;
 | 
			
		||||
 | 
			
		||||
  // Hardcoding here the size of the matrices
 | 
			
		||||
  typedef ScalarAdjMatrixImplTypes<vComplex,  QCD::Nc> ScalarAdjImplR;
 | 
			
		||||
  typedef ScalarAdjMatrixImplTypes<vComplexF, QCD::Nc> ScalarAdjImplF;
 | 
			
		||||
  typedef ScalarAdjMatrixImplTypes<vComplexD, QCD::Nc> ScalarAdjImplD;
 | 
			
		||||
 | 
			
		||||
  template <int Colours > using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes<vComplex,   Colours >;
 | 
			
		||||
  template <int Colours > using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes<vComplexF,  Colours >;
 | 
			
		||||
  template <int Colours > using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes<vComplexD,  Colours >;
 | 
			
		||||
 | 
			
		||||
  //}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,208 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
  Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
  Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
  Author: Guido Cossu <guido,cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
  This program is free software; you can redistribute it and/or modify
 | 
			
		||||
  it under the terms of the GNU General Public License as published by
 | 
			
		||||
  the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
  (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
  This program is distributed in the hope that it will be useful,
 | 
			
		||||
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
  GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
  You should have received a copy of the GNU General Public License along
 | 
			
		||||
  with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
  See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
  *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR_INT_ACTION_H
 | 
			
		||||
#define SCALAR_INT_ACTION_H
 | 
			
		||||
 | 
			
		||||
// Note: this action can completely absorb the ScalarAction for real float fields
 | 
			
		||||
// use the scalarObjs to generalise the structure
 | 
			
		||||
 | 
			
		||||
namespace Grid
 | 
			
		||||
{
 | 
			
		||||
// FIXME drop the QCD namespace everywhere here
 | 
			
		||||
 | 
			
		||||
template <class Impl, int Ndim>
 | 
			
		||||
class ScalarInteractionAction : public QCD::Action<typename Impl::Field>
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  INHERIT_FIELD_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  RealD mass_square;
 | 
			
		||||
  RealD lambda;
 | 
			
		||||
  RealD g;
 | 
			
		||||
  const unsigned int N = Impl::Group::Dimension;
 | 
			
		||||
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  typedef CartesianStencil<vobj, vobj> Stencil;
 | 
			
		||||
 | 
			
		||||
  SimpleCompressor<vobj> compressor;
 | 
			
		||||
  int npoint = 2 * Ndim;
 | 
			
		||||
  std::vector<int> directions;    //
 | 
			
		||||
  std::vector<int> displacements; //
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0)
 | 
			
		||||
  {
 | 
			
		||||
    for (int mu = 0; mu < Ndim; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      directions[mu] = mu;
 | 
			
		||||
      directions[mu + Ndim] = mu;
 | 
			
		||||
      displacements[mu] = 1;
 | 
			
		||||
      displacements[mu + Ndim] = -1;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual std::string LogParameters()
 | 
			
		||||
  {
 | 
			
		||||
    std::stringstream sstream;
 | 
			
		||||
    sstream << GridLogMessage << "[ScalarAction] lambda      : " << lambda << std::endl;
 | 
			
		||||
    sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
 | 
			
		||||
    sstream << GridLogMessage << "[ScalarAction] g           : " << g << std::endl;
 | 
			
		||||
    return sstream.str();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual std::string action_name() { return "ScalarAction"; }
 | 
			
		||||
 | 
			
		||||
  virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
 | 
			
		||||
 | 
			
		||||
  virtual RealD S(const Field &p)
 | 
			
		||||
  {
 | 
			
		||||
    assert(p._grid->Nd() == Ndim);
 | 
			
		||||
    static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
 | 
			
		||||
    phiStencil.HaloExchange(p, compressor);
 | 
			
		||||
    Field action(p._grid), pshift(p._grid), phisquared(p._grid);
 | 
			
		||||
    phisquared = p * p;
 | 
			
		||||
    action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
 | 
			
		||||
    for (int mu = 0; mu < Ndim; mu++)
 | 
			
		||||
    {
 | 
			
		||||
      //  pshift = Cshift(p, mu, +1);  // not efficient, implement with stencils
 | 
			
		||||
      parallel_for(int i = 0; i < p._grid->oSites(); i++)
 | 
			
		||||
      {
 | 
			
		||||
        int permute_type;
 | 
			
		||||
        StencilEntry *SE;
 | 
			
		||||
        vobj temp2;
 | 
			
		||||
        const vobj *temp, *t_p;
 | 
			
		||||
 | 
			
		||||
        SE = phiStencil.GetEntry(permute_type, mu, i);
 | 
			
		||||
        t_p = &p._odata[i];
 | 
			
		||||
        if (SE->_is_local)
 | 
			
		||||
        {
 | 
			
		||||
          temp = &p._odata[SE->_offset];
 | 
			
		||||
          if (SE->_permute)
 | 
			
		||||
          {
 | 
			
		||||
            permute(temp2, *temp, permute_type);
 | 
			
		||||
            action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
 | 
			
		||||
          }
 | 
			
		||||
          else
 | 
			
		||||
          {
 | 
			
		||||
            action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      //  action -= pshift*p + p*pshift;
 | 
			
		||||
    }
 | 
			
		||||
    // NB the trace in the algebra is normalised to 1/2
 | 
			
		||||
    // minus sign coming from the antihermitian fields
 | 
			
		||||
    return -(TensorRemove(sum(trace(action)))).real() * N / g;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  virtual void deriv(const Field &p, Field &force)
 | 
			
		||||
  {
 | 
			
		||||
    double t0 = usecond();
 | 
			
		||||
    assert(p._grid->Nd() == Ndim);
 | 
			
		||||
    force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
 | 
			
		||||
    double interm_t = usecond();
 | 
			
		||||
 | 
			
		||||
    // move this outside
 | 
			
		||||
    static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
 | 
			
		||||
 | 
			
		||||
    phiStencil.HaloExchange(p, compressor);
 | 
			
		||||
    double halo_t = usecond();
 | 
			
		||||
    int chunk = 128;
 | 
			
		||||
    //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
 | 
			
		||||
 | 
			
		||||
    // inverting the order of the loops slows down the code(! g++ 7)
 | 
			
		||||
    // cannot try to reduce the number of  force writes by factor npoint...
 | 
			
		||||
    // use cache blocking
 | 
			
		||||
    for (int point = 0; point < npoint; point++)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
#pragma omp parallel 
 | 
			
		||||
{
 | 
			
		||||
        int permute_type;
 | 
			
		||||
        StencilEntry *SE;
 | 
			
		||||
        const vobj *temp;
 | 
			
		||||
 | 
			
		||||
#pragma omp for schedule(static, chunk)
 | 
			
		||||
      for (int i = 0; i < p._grid->oSites(); i++)
 | 
			
		||||
      {
 | 
			
		||||
        SE = phiStencil.GetEntry(permute_type, point, i);
 | 
			
		||||
        // prefetch next p?
 | 
			
		||||
 | 
			
		||||
        if (SE->_is_local)
 | 
			
		||||
        {
 | 
			
		||||
          temp = &p._odata[SE->_offset];
 | 
			
		||||
      
 | 
			
		||||
          if (SE->_permute)
 | 
			
		||||
          {
 | 
			
		||||
            vobj temp2;
 | 
			
		||||
            permute(temp2, *temp, permute_type);
 | 
			
		||||
            force._odata[i] -= temp2;
 | 
			
		||||
          }
 | 
			
		||||
          else
 | 
			
		||||
          {
 | 
			
		||||
            force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  force *= N / g;
 | 
			
		||||
 | 
			
		||||
  double t1 = usecond();
 | 
			
		||||
  double total_time = (t1 - t0) / 1e6;
 | 
			
		||||
  double interm_time = (interm_t - t0) / 1e6;
 | 
			
		||||
  double halo_time = (halo_t - interm_t) / 1e6;
 | 
			
		||||
  double stencil_time = (t1 - halo_t) / 1e6;
 | 
			
		||||
  std::cout << GridLogIntegrator << "Total time for force computation (s)       : " << total_time << std::endl;
 | 
			
		||||
  std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
 | 
			
		||||
  std::cout << GridLogIntegrator << "Halo time in force computation (s)         : " << halo_time << std::endl;
 | 
			
		||||
  std::cout << GridLogIntegrator << "Stencil time in force computation (s)      : " << stencil_time << std::endl;
 | 
			
		||||
  double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
 | 
			
		||||
  double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
 | 
			
		||||
  double Gflops = flops / (total_time * 1e9);
 | 
			
		||||
  double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
 | 
			
		||||
  std::cout << GridLogIntegrator << "Flops: " << flops << "  - Gflop/s : " << Gflops << std::endl;
 | 
			
		||||
  std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << "  - Gflop/s NS: " << Gflops_no_stencil << std::endl;
 | 
			
		||||
}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif // SCALAR_INT_ACTION_H
 | 
			
		||||
@@ -1,219 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
  See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
  directory
 | 
			
		||||
  *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_GENERIC_HMC_RUNNER
 | 
			
		||||
#define GRID_GENERIC_HMC_RUNNER
 | 
			
		||||
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// very ugly here but possibly resolved if we had a base Reader class
 | 
			
		||||
template < class ReaderClass >
 | 
			
		||||
class HMCRunnerBase {
 | 
			
		||||
public:
 | 
			
		||||
  virtual void Run() = 0;
 | 
			
		||||
  virtual void initialize(ReaderClass& ) = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Implementation,
 | 
			
		||||
          template <typename, typename, typename> class Integrator,
 | 
			
		||||
          class RepresentationsPolicy = NoHirep, class ReaderClass = XmlReader>
 | 
			
		||||
class HMCWrapperTemplate: public HMCRunnerBase<ReaderClass> {
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_FIELD_TYPES(Implementation);
 | 
			
		||||
  typedef Implementation ImplPolicy;  // visible from outside
 | 
			
		||||
  template <typename S = NoSmearing<Implementation> >
 | 
			
		||||
  using IntegratorType = Integrator<Implementation, S, RepresentationsPolicy>;
 | 
			
		||||
 | 
			
		||||
  HMCparameters Parameters;
 | 
			
		||||
  std::string ParameterFile;
 | 
			
		||||
  HMCResourceManager<Implementation> Resources;
 | 
			
		||||
 | 
			
		||||
  // The set of actions (keep here for lower level users, for now)
 | 
			
		||||
  ActionSet<Field, RepresentationsPolicy> TheAction;
 | 
			
		||||
 | 
			
		||||
  HMCWrapperTemplate() = default;
 | 
			
		||||
 | 
			
		||||
  HMCWrapperTemplate(HMCparameters Par){
 | 
			
		||||
    Parameters = Par;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void initialize(ReaderClass & TheReader){
 | 
			
		||||
    std::cout  << "Initialization of the HMC" << std::endl;
 | 
			
		||||
    Resources.initialize(TheReader);
 | 
			
		||||
 | 
			
		||||
    // eventually add smearing
 | 
			
		||||
 | 
			
		||||
    Resources.GetActionSet(TheAction);    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  void ReadCommandLine(int argc, char **argv) {
 | 
			
		||||
    std::string arg;
 | 
			
		||||
 | 
			
		||||
    if (GridCmdOptionExists(argv, argv + argc, "--StartingType")) {
 | 
			
		||||
      arg = GridCmdOptionPayload(argv, argv + argc, "--StartingType");
 | 
			
		||||
 | 
			
		||||
      if (arg != "HotStart" && arg != "ColdStart" && arg != "TepidStart" &&
 | 
			
		||||
          arg != "CheckpointStart") {
 | 
			
		||||
        std::cout << GridLogError << "Unrecognized option in --StartingType\n";
 | 
			
		||||
        std::cout
 | 
			
		||||
            << GridLogError
 | 
			
		||||
            << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
 | 
			
		||||
        exit(1);
 | 
			
		||||
      }
 | 
			
		||||
      Parameters.StartingType = arg;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (GridCmdOptionExists(argv, argv + argc, "--StartingTrajectory")) {
 | 
			
		||||
      arg = GridCmdOptionPayload(argv, argv + argc, "--StartingTrajectory");
 | 
			
		||||
      std::vector<int> ivec(0);
 | 
			
		||||
      GridCmdOptionIntVector(arg, ivec);
 | 
			
		||||
      Parameters.StartTrajectory = ivec[0];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) {
 | 
			
		||||
      arg = GridCmdOptionPayload(argv, argv + argc, "--Trajectories");
 | 
			
		||||
      std::vector<int> ivec(0);
 | 
			
		||||
      GridCmdOptionIntVector(arg, ivec);
 | 
			
		||||
      Parameters.Trajectories = ivec[0];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) {
 | 
			
		||||
      arg = GridCmdOptionPayload(argv, argv + argc, "--Thermalizations");
 | 
			
		||||
      std::vector<int> ivec(0);
 | 
			
		||||
      GridCmdOptionIntVector(arg, ivec);
 | 
			
		||||
      Parameters.NoMetropolisUntil = ivec[0];
 | 
			
		||||
    }
 | 
			
		||||
    if (GridCmdOptionExists(argv, argv + argc, "--ParameterFile")) {
 | 
			
		||||
      arg = GridCmdOptionPayload(argv, argv + argc, "--ParameterFile");
 | 
			
		||||
      ParameterFile = arg;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template <class SmearingPolicy>
 | 
			
		||||
  void Run(SmearingPolicy &S) {
 | 
			
		||||
    Runner(S);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void Run(){
 | 
			
		||||
    NoSmearing<Implementation> S;
 | 
			
		||||
    Runner(S);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  template <class SmearingPolicy>
 | 
			
		||||
  void Runner(SmearingPolicy &Smearing) {
 | 
			
		||||
    auto UGrid = Resources.GetCartesian();
 | 
			
		||||
    Resources.AddRNGs();
 | 
			
		||||
    Field U(UGrid);
 | 
			
		||||
 | 
			
		||||
    // Can move this outside?
 | 
			
		||||
    typedef IntegratorType<SmearingPolicy> TheIntegrator;
 | 
			
		||||
    TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
 | 
			
		||||
 | 
			
		||||
    if (Parameters.StartingType == "HotStart") {
 | 
			
		||||
      // Hot start
 | 
			
		||||
      Resources.SeedFixedIntegers();
 | 
			
		||||
      Implementation::HotConfiguration(Resources.GetParallelRNG(), U);
 | 
			
		||||
    } else if (Parameters.StartingType == "ColdStart") {
 | 
			
		||||
      // Cold start
 | 
			
		||||
      Resources.SeedFixedIntegers();
 | 
			
		||||
      Implementation::ColdConfiguration(Resources.GetParallelRNG(), U);
 | 
			
		||||
    } else if (Parameters.StartingType == "TepidStart") {
 | 
			
		||||
      // Tepid start
 | 
			
		||||
      Resources.SeedFixedIntegers();
 | 
			
		||||
      Implementation::TepidConfiguration(Resources.GetParallelRNG(), U);
 | 
			
		||||
    } else if (Parameters.StartingType == "CheckpointStart") {
 | 
			
		||||
      // CheckpointRestart
 | 
			
		||||
      Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U,
 | 
			
		||||
                                   Resources.GetSerialRNG(),
 | 
			
		||||
                                   Resources.GetParallelRNG());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Smearing.set_Field(U);
 | 
			
		||||
 | 
			
		||||
    HybridMonteCarlo<TheIntegrator> HMC(Parameters, MDynamics,
 | 
			
		||||
                                        Resources.GetSerialRNG(),
 | 
			
		||||
                                        Resources.GetParallelRNG(), 
 | 
			
		||||
                                        Resources.GetObservables(), U);
 | 
			
		||||
 | 
			
		||||
    // Run it
 | 
			
		||||
    HMC.evolve();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// These are for gauge fields, default integrator MinimumNorm2
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using GenericHMCRunner = HMCWrapperTemplate<PeriodicGimplR, Integrator>;
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using GenericHMCRunnerF = HMCWrapperTemplate<PeriodicGimplF, Integrator>;
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using GenericHMCRunnerD = HMCWrapperTemplate<PeriodicGimplD, Integrator>;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// These are for gauge fields, default integrator MinimumNorm2
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using ConjugateHMCRunner = HMCWrapperTemplate<ConjugateGimplR, Integrator>;
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using ConjugateHMCRunnerF = HMCWrapperTemplate<ConjugateGimplF, Integrator>;
 | 
			
		||||
template <template <typename, typename, typename> class Integrator>
 | 
			
		||||
using ConjugateHMCRunnerD = HMCWrapperTemplate<ConjugateGimplD, Integrator>;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class RepresentationsPolicy,
 | 
			
		||||
          template <typename, typename, typename> class Integrator>
 | 
			
		||||
using GenericHMCRunnerHirep =
 | 
			
		||||
    HMCWrapperTemplate<PeriodicGimplR, Integrator, RepresentationsPolicy>;
 | 
			
		||||
 | 
			
		||||
template <class Implementation, class RepresentationsPolicy, 
 | 
			
		||||
          template <typename, typename, typename> class Integrator>
 | 
			
		||||
using GenericHMCRunnerTemplate = HMCWrapperTemplate<Implementation, Integrator, RepresentationsPolicy>;
 | 
			
		||||
 | 
			
		||||
typedef HMCWrapperTemplate<ScalarImplR, MinimumNorm2, ScalarFields>
 | 
			
		||||
    ScalarGenericHMCRunner;
 | 
			
		||||
 | 
			
		||||
typedef HMCWrapperTemplate<ScalarAdjImplR, MinimumNorm2, ScalarMatrixFields>
 | 
			
		||||
    ScalarAdjGenericHMCRunner;
 | 
			
		||||
 | 
			
		||||
template <int Colours> 
 | 
			
		||||
using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, ForceGradient, ScalarNxNMatrixFields<Colours> >;
 | 
			
		||||
 | 
			
		||||
}  // namespace QCD
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif  // GRID_GENERIC_HMC_RUNNER
 | 
			
		||||
@@ -1,111 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_HMC_MODULES
 | 
			
		||||
#define GRID_HMC_MODULES
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include "HMC_GridModules.h"
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////
 | 
			
		||||
struct RNGModuleParameters: Serializable {
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(RNGModuleParameters,
 | 
			
		||||
  std::string, serial_seeds,
 | 
			
		||||
  std::string, parallel_seeds,);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> getSerialSeeds(){return strToVec<int>(serial_seeds);}
 | 
			
		||||
  std::vector<int> getParallelSeeds(){return strToVec<int>(parallel_seeds);}
 | 
			
		||||
 | 
			
		||||
  RNGModuleParameters(): serial_seeds("1"), parallel_seeds("1"){}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass >
 | 
			
		||||
  RNGModuleParameters(Reader<ReaderClass>& Reader){
 | 
			
		||||
    read(Reader, "RandomNumberGenerator", *this); 
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Random number generators module
 | 
			
		||||
class RNGModule{
 | 
			
		||||
   GridSerialRNG sRNG_;
 | 
			
		||||
   std::unique_ptr<GridParallelRNG> pRNG_;
 | 
			
		||||
   RNGModuleParameters Params_;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  RNGModule(){};
 | 
			
		||||
 | 
			
		||||
  void set_pRNG(GridParallelRNG* pRNG){
 | 
			
		||||
    pRNG_.reset(pRNG);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void set_RNGSeeds(RNGModuleParameters& Params) {
 | 
			
		||||
    Params_ = Params;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridSerialRNG& get_sRNG() { return sRNG_; }
 | 
			
		||||
  GridParallelRNG& get_pRNG() { return *pRNG_.get(); }
 | 
			
		||||
 | 
			
		||||
  void seed() {
 | 
			
		||||
    auto SerialSeeds   = Params_.getSerialSeeds();
 | 
			
		||||
    auto ParallelSeeds = Params_.getParallelSeeds();
 | 
			
		||||
    if (SerialSeeds.size() == 0 && ParallelSeeds.size() == 0) {
 | 
			
		||||
      std::cout << GridLogError << "Seeds not initialized" << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    sRNG_.SeedFixedIntegers(SerialSeeds);
 | 
			
		||||
    pRNG_->SeedFixedIntegers(ParallelSeeds);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
///////////////////////////////////////////////////////////////////
 | 
			
		||||
/// Smearing module
 | 
			
		||||
template <class ImplementationPolicy>
 | 
			
		||||
class SmearingModule{
 | 
			
		||||
   virtual void get_smearing();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class ImplementationPolicy>
 | 
			
		||||
class StoutSmearingModule: public SmearingModule<ImplementationPolicy>{
 | 
			
		||||
   SmearedConfiguration<ImplementationPolicy> SmearingPolicy;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}  // namespace QCD
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif  // GRID_HMC_MODULES
 | 
			
		||||
@@ -1,328 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
  See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
  directory
 | 
			
		||||
  *************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef HMC_RESOURCE_MANAGER_H
 | 
			
		||||
#define HMC_RESOURCE_MANAGER_H
 | 
			
		||||
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
 | 
			
		||||
// One function per Checkpointer, use a macro to simplify
 | 
			
		||||
#define RegisterLoadCheckPointerFunction(NAME)                           \
 | 
			
		||||
  void Load##NAME##Checkpointer(const CheckpointerParameters& Params_) { \
 | 
			
		||||
    if (!have_CheckPointer) {                                            \
 | 
			
		||||
      std::cout << GridLogDebug << "Loading Checkpointer " << #NAME      \
 | 
			
		||||
                << std::endl;                                            \
 | 
			
		||||
      CP = std::unique_ptr<CheckpointerBaseModule>(                      \
 | 
			
		||||
        new NAME##CPModule<ImplementationPolicy>(Params_));              \
 | 
			
		||||
      have_CheckPointer = true;                                          \
 | 
			
		||||
    } else {                                                             \
 | 
			
		||||
      std::cout << GridLogError << "Checkpointer already loaded "        \
 | 
			
		||||
                << std::endl;                                            \
 | 
			
		||||
      exit(1);                                                           \
 | 
			
		||||
    }                                                                    \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#define RegisterLoadCheckPointerMetadataFunction(NAME)                   \
 | 
			
		||||
  template < class Metadata >                                            \
 | 
			
		||||
  void Load##NAME##Checkpointer(const CheckpointerParameters& Params_, const Metadata& M_) { \
 | 
			
		||||
    if (!have_CheckPointer) {                                            \
 | 
			
		||||
      std::cout << GridLogDebug << "Loading Metadata Checkpointer " << #NAME      \
 | 
			
		||||
                << std::endl;                                            \
 | 
			
		||||
      CP = std::unique_ptr<CheckpointerBaseModule>(                      \
 | 
			
		||||
        new NAME##CPModule<ImplementationPolicy, Metadata >(Params_, M_));   \
 | 
			
		||||
      have_CheckPointer = true;                                          \
 | 
			
		||||
    } else {                                                             \
 | 
			
		||||
      std::cout << GridLogError << "Checkpointer already loaded "        \
 | 
			
		||||
                << std::endl;                                            \
 | 
			
		||||
      exit(1);                                                           \
 | 
			
		||||
    }                                                                    \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// HMC Resource manager
 | 
			
		||||
template <class ImplementationPolicy>
 | 
			
		||||
class HMCResourceManager {
 | 
			
		||||
  typedef HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > CheckpointerBaseModule;
 | 
			
		||||
  typedef HMCModuleBase< QCD::HmcObservable<typename ImplementationPolicy::Field> > ObservableBaseModule;
 | 
			
		||||
  typedef ActionModuleBase< QCD::Action<typename ImplementationPolicy::Field>, GridModule > ActionBaseModule;
 | 
			
		||||
 | 
			
		||||
  // Named storage for grid pairs (std + red-black)
 | 
			
		||||
  std::unordered_map<std::string, GridModule> Grids;
 | 
			
		||||
  RNGModule RNGs;
 | 
			
		||||
 | 
			
		||||
  // SmearingModule<ImplementationPolicy> Smearing;
 | 
			
		||||
  std::unique_ptr<CheckpointerBaseModule> CP;
 | 
			
		||||
 | 
			
		||||
  // A vector of HmcObservable modules
 | 
			
		||||
  std::vector<std::unique_ptr<ObservableBaseModule> > ObservablesList;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // A vector of HmcObservable modules
 | 
			
		||||
  std::multimap<int, std::unique_ptr<ActionBaseModule> > ActionsList;
 | 
			
		||||
  std::vector<int> multipliers;
 | 
			
		||||
 | 
			
		||||
  bool have_RNG;
 | 
			
		||||
  bool have_CheckPointer;
 | 
			
		||||
 | 
			
		||||
  // NOTE: operator << is not overloaded for std::vector<string> 
 | 
			
		||||
  // so this function is necessary
 | 
			
		||||
  void output_vector_string(const std::vector<std::string> &vs){
 | 
			
		||||
    for (auto &i: vs)
 | 
			
		||||
      std::cout << i << " ";
 | 
			
		||||
    std::cout << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass, class vector_type = vComplex >
 | 
			
		||||
  void initialize(ReaderClass &Read){
 | 
			
		||||
    // assumes we are starting from the main node
 | 
			
		||||
 | 
			
		||||
    // Geometry
 | 
			
		||||
    GridModuleParameters GridPar(Read);
 | 
			
		||||
    GridFourDimModule<vector_type> GridMod( GridPar) ;
 | 
			
		||||
    AddGrid("gauge", GridMod);
 | 
			
		||||
 | 
			
		||||
    // Checkpointer
 | 
			
		||||
    auto &CPfactory = HMC_CPModuleFactory<cp_string, ImplementationPolicy, ReaderClass >::getInstance();
 | 
			
		||||
    Read.push("Checkpointer");
 | 
			
		||||
    std::string cp_type;
 | 
			
		||||
    read(Read,"name", cp_type);
 | 
			
		||||
    std::cout << "Registered types " << std::endl;
 | 
			
		||||
    output_vector_string(CPfactory.getBuilderList());
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    CP = CPfactory.create(cp_type, Read);
 | 
			
		||||
    CP->print_parameters();
 | 
			
		||||
    Read.pop();    
 | 
			
		||||
    have_CheckPointer = true;  
 | 
			
		||||
 | 
			
		||||
    RNGModuleParameters RNGpar(Read);
 | 
			
		||||
    SetRNGSeeds(RNGpar);
 | 
			
		||||
 | 
			
		||||
    // Observables
 | 
			
		||||
    auto &ObsFactory = HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance(); 
 | 
			
		||||
    Read.push(observable_string);// here must check if existing...
 | 
			
		||||
    do {
 | 
			
		||||
      std::string obs_type;
 | 
			
		||||
      read(Read,"name", obs_type);
 | 
			
		||||
      std::cout << "Registered types " << std::endl;
 | 
			
		||||
      output_vector_string(ObsFactory.getBuilderList() );
 | 
			
		||||
 | 
			
		||||
      ObservablesList.emplace_back(ObsFactory.create(obs_type, Read));
 | 
			
		||||
      ObservablesList[ObservablesList.size() - 1]->print_parameters();
 | 
			
		||||
    } while (Read.nextElement(observable_string));
 | 
			
		||||
    Read.pop();
 | 
			
		||||
 | 
			
		||||
    // Loop on levels
 | 
			
		||||
    if(!Read.push("Actions")){
 | 
			
		||||
      std::cout << "Actions not found" << std::endl; 
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if(!Read.push("Level")){// push must check if the node exist
 | 
			
		||||
         std::cout << "Level not found" << std::endl; 
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    do
 | 
			
		||||
    {
 | 
			
		||||
      fill_ActionsLevel(Read); 
 | 
			
		||||
    }
 | 
			
		||||
    while(Read.push("Level"));
 | 
			
		||||
 | 
			
		||||
    Read.pop();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
  template <class RepresentationPolicy>
 | 
			
		||||
  void GetActionSet(ActionSet<typename ImplementationPolicy::Field, RepresentationPolicy>& Aset){
 | 
			
		||||
    Aset.resize(multipliers.size());
 | 
			
		||||
 
 | 
			
		||||
    for(auto it = ActionsList.begin(); it != ActionsList.end(); it++){
 | 
			
		||||
      (*it).second->acquireResource(Grids["gauge"]);
 | 
			
		||||
      Aset[(*it).first-1].push_back((*it).second->getPtr());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////////////
 | 
			
		||||
  // Grids
 | 
			
		||||
  //////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  void AddGrid(const std::string s, GridModule& M) {
 | 
			
		||||
    // Check for name clashes
 | 
			
		||||
    auto search = Grids.find(s);
 | 
			
		||||
    if (search != Grids.end()) {
 | 
			
		||||
      std::cout << GridLogError << "Grid with name \"" << search->first
 | 
			
		||||
                << "\" already present. Terminating\n";
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    Grids[s] = std::move(M);
 | 
			
		||||
    std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "HMCResourceManager:" << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Created grid set with name '" << s << "' and decomposition for the full cartesian " << std::endl;
 | 
			
		||||
    Grids[s].show_full_decomposition();
 | 
			
		||||
    std::cout << GridLogMessage << "::::::::::::::::::::::::::::::::::::::::" <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Add a named grid set, 4d shortcut
 | 
			
		||||
  void AddFourDimGrid(const std::string s) {
 | 
			
		||||
    GridFourDimModule<vComplex> Mod;
 | 
			
		||||
    AddGrid(s, Mod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Add a named grid set, 4d shortcut + tweak simd lanes
 | 
			
		||||
  void AddFourDimGrid(const std::string s, const std::vector<int> simd_decomposition) {
 | 
			
		||||
    GridFourDimModule<vComplex> Mod(simd_decomposition);
 | 
			
		||||
    AddGrid(s, Mod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  GridCartesian* GetCartesian(std::string s = "") {
 | 
			
		||||
    if (s.empty()) s = Grids.begin()->first;
 | 
			
		||||
    std::cout << GridLogDebug << "Getting cartesian grid from: " << s
 | 
			
		||||
              << std::endl;
 | 
			
		||||
    return Grids[s].get_full();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridRedBlackCartesian* GetRBCartesian(std::string s = "") {
 | 
			
		||||
    if (s.empty()) s = Grids.begin()->first;
 | 
			
		||||
    std::cout << GridLogDebug << "Getting rb-cartesian grid from: " << s
 | 
			
		||||
              << std::endl;
 | 
			
		||||
    return Grids[s].get_rb();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////
 | 
			
		||||
  // Random number generators
 | 
			
		||||
  //////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  void AddRNGs(std::string s = "") {
 | 
			
		||||
    // Couple the RNGs to the GridModule tagged by s
 | 
			
		||||
    // the default is the first grid registered
 | 
			
		||||
    assert(Grids.size() > 0 && !have_RNG);
 | 
			
		||||
    if (s.empty()) s = Grids.begin()->first;
 | 
			
		||||
    std::cout << GridLogDebug << "Adding RNG to grid: " << s << std::endl;
 | 
			
		||||
    RNGs.set_pRNG(new GridParallelRNG(GetCartesian(s)));
 | 
			
		||||
    have_RNG = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void SetRNGSeeds(RNGModuleParameters& Params) { RNGs.set_RNGSeeds(Params); }
 | 
			
		||||
 | 
			
		||||
  GridSerialRNG& GetSerialRNG() { return RNGs.get_sRNG(); }
 | 
			
		||||
 | 
			
		||||
  GridParallelRNG& GetParallelRNG() {
 | 
			
		||||
    assert(have_RNG);
 | 
			
		||||
    return RNGs.get_pRNG();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void SeedFixedIntegers() {
 | 
			
		||||
    assert(have_RNG);
 | 
			
		||||
    RNGs.seed();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////////
 | 
			
		||||
  // Checkpointers
 | 
			
		||||
  //////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  BaseHmcCheckpointer<ImplementationPolicy>* GetCheckPointer() {
 | 
			
		||||
    if (have_CheckPointer)
 | 
			
		||||
      return CP->getPtr();
 | 
			
		||||
    else {
 | 
			
		||||
      std::cout << GridLogError << "Error: no checkpointer defined"
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RegisterLoadCheckPointerFunction(Binary);
 | 
			
		||||
  RegisterLoadCheckPointerFunction(Nersc);
 | 
			
		||||
  #ifdef HAVE_LIME
 | 
			
		||||
  RegisterLoadCheckPointerFunction(ILDG);
 | 
			
		||||
  RegisterLoadCheckPointerMetadataFunction(Scidac);
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Observables
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  template<class T, class... Types>
 | 
			
		||||
  void AddObservable(Types&&... Args){
 | 
			
		||||
    ObservablesList.push_back(std::unique_ptr<T>(new T(std::forward<Types>(Args)...)));
 | 
			
		||||
    ObservablesList.back()->print_parameters();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<HmcObservable<typename ImplementationPolicy::Field>* > GetObservables(){
 | 
			
		||||
    std::vector<HmcObservable<typename ImplementationPolicy::Field>* > out;
 | 
			
		||||
    for (auto &i : ObservablesList){
 | 
			
		||||
      out.push_back(i->getPtr());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Add the checkpointer to the observables
 | 
			
		||||
    out.push_back(GetCheckPointer());
 | 
			
		||||
    return out;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
   // this private
 | 
			
		||||
  template <class ReaderClass >
 | 
			
		||||
  void fill_ActionsLevel(ReaderClass &Read){
 | 
			
		||||
    // Actions set
 | 
			
		||||
    int m;
 | 
			
		||||
    Read.readDefault("multiplier",m);
 | 
			
		||||
    multipliers.push_back(m);
 | 
			
		||||
    std::cout << "Level : " << multipliers.size()  << " with multiplier : " << m << std::endl; 
 | 
			
		||||
    // here gauge
 | 
			
		||||
    Read.push("Action");
 | 
			
		||||
    do{
 | 
			
		||||
      auto &ActionFactory = HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, ReaderClass>::getInstance(); 
 | 
			
		||||
      std::string action_type;
 | 
			
		||||
      Read.readDefault("name", action_type); 
 | 
			
		||||
      output_vector_string(ActionFactory.getBuilderList() );
 | 
			
		||||
      ActionsList.emplace(m, ActionFactory.create(action_type, Read));
 | 
			
		||||
    } while (Read.nextElement("Action"));
 | 
			
		||||
    ActionsList.find(m)->second->print_parameters();    
 | 
			
		||||
    Read.pop();
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // HMC_RESOURCE_MANAGER_H
 | 
			
		||||
@@ -1,137 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/GenericHmcRunner.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef HMC_RUNNER_MODULE
 | 
			
		||||
#define HMC_RUNNER_MODULE
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
// the reader class is necessary here for the automatic initialization of the resources
 | 
			
		||||
// if we had a virtual reader would have been unecessary
 | 
			
		||||
template <class HMCType, class ReaderClass >
 | 
			
		||||
class HMCModule
 | 
			
		||||
    : public Parametrized< QCD::HMCparameters >,
 | 
			
		||||
      public HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<HMCType> HMCPtr;
 | 
			
		||||
 | 
			
		||||
  HMCModule(QCD::HMCparameters Par) : Parametrized<QCD::HMCparameters>(Par) {}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderCl>
 | 
			
		||||
  HMCModule(Reader<ReaderCl>& R) : Parametrized<QCD::HMCparameters>(R, "HMC"){};
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!HMCPtr) initialize();
 | 
			
		||||
 
 | 
			
		||||
    return HMCPtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Factory
 | 
			
		||||
template <char const *str, class ReaderClass >
 | 
			
		||||
class HMCRunnerModuleFactory
 | 
			
		||||
    : public Factory < HMCModuleBase< QCD::HMCRunnerBase<ReaderClass> > ,	Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
 	typedef Reader<ReaderClass> TheReader; 
 | 
			
		||||
 	// use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  HMCRunnerModuleFactory(const HMCRunnerModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMCRunnerModuleFactory& e) = delete;
 | 
			
		||||
  static HMCRunnerModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMCRunnerModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMCRunnerModuleFactory(void) = default;
 | 
			
		||||
  std::string obj_type() const {
 | 
			
		||||
  	return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////
 | 
			
		||||
// macro for these
 | 
			
		||||
 | 
			
		||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
 | 
			
		||||
class HMCLeapFrog: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>, ReaderClass >{
 | 
			
		||||
  typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>, ReaderClass  > HMCBaseMod;
 | 
			
		||||
  using HMCBaseMod::HMCBaseMod;
 | 
			
		||||
 | 
			
		||||
  // aquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::LeapFrog>(this->Par_) );
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
 | 
			
		||||
class HMCMinimumNorm2: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>, ReaderClass  >{
 | 
			
		||||
  typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>, ReaderClass  > HMCBaseMod;
 | 
			
		||||
  using HMCBaseMod::HMCBaseMod;
 | 
			
		||||
 | 
			
		||||
  // aquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::MinimumNorm2>(this->Par_));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass >
 | 
			
		||||
class HMCForceGradient: public HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>, ReaderClass  >{
 | 
			
		||||
  typedef HMCModule< QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>, ReaderClass   > HMCBaseMod;
 | 
			
		||||
  using HMCBaseMod::HMCBaseMod;
 | 
			
		||||
 | 
			
		||||
  // aquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate<ImplementationPolicy, RepresentationPolicy, QCD::ForceGradient>(this->Par_) );
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern char hmc_string[];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,177 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/HMC_GridModules.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef HMC_GRID_MODULES
 | 
			
		||||
#define HMC_GRID_MODULES
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
// Resources
 | 
			
		||||
// Modules for grids
 | 
			
		||||
 | 
			
		||||
// Introduce another namespace HMCModules?
 | 
			
		||||
 | 
			
		||||
class GridModuleParameters: Serializable{
 | 
			
		||||
public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(GridModuleParameters,
 | 
			
		||||
  std::string, lattice,
 | 
			
		||||
  std::string, mpi);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> getLattice() const {return strToVec<int>(lattice);}
 | 
			
		||||
  std::vector<int> getMpi()     const {return strToVec<int>(mpi);}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  void check() const {
 | 
			
		||||
    if (getLattice().size() != getMpi().size() ) {
 | 
			
		||||
      std::cout << GridLogError
 | 
			
		||||
                << "Error in GridModuleParameters: lattice and mpi dimensions "
 | 
			
		||||
                   "do not match"
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  GridModuleParameters(Reader<ReaderClass>& Reader, std::string n = "LatticeGrid"):name(n) {
 | 
			
		||||
    read(Reader, name, *this);
 | 
			
		||||
    check();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Save on file
 | 
			
		||||
  template< class WriterClass>
 | 
			
		||||
  void save(Writer<WriterClass>& Writer){
 | 
			
		||||
    check();
 | 
			
		||||
    write(Writer, name, *this);
 | 
			
		||||
  }
 | 
			
		||||
private:
 | 
			
		||||
    std::string name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Lower level class
 | 
			
		||||
class GridModule {
 | 
			
		||||
 public:
 | 
			
		||||
  GridCartesian* get_full() {
 | 
			
		||||
    std::cout << GridLogDebug << "Getting cartesian in module"<< std::endl;
 | 
			
		||||
    return grid_.get(); }
 | 
			
		||||
  GridRedBlackCartesian* get_rb() {
 | 
			
		||||
    std::cout << GridLogDebug << "Getting rb-cartesian in module"<< std::endl;
 | 
			
		||||
    return rbgrid_.get(); }
 | 
			
		||||
 | 
			
		||||
  void set_full(GridCartesian* grid) { grid_.reset(grid); }
 | 
			
		||||
  void set_rb(GridRedBlackCartesian* rbgrid) { rbgrid_.reset(rbgrid); }
 | 
			
		||||
  void show_full_decomposition(){ grid_->show_decomposition(); }
 | 
			
		||||
  void show_rb_decomposition(){ rbgrid_->show_decomposition(); }
 | 
			
		||||
 | 
			
		||||
 protected:
 | 
			
		||||
  std::unique_ptr<GridCartesian> grid_;
 | 
			
		||||
  std::unique_ptr<GridRedBlackCartesian> rbgrid_;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////
 | 
			
		||||
// Classes for the user
 | 
			
		||||
////////////////////////////////////
 | 
			
		||||
// Note: the space time grid should be out of the QCD namespace
 | 
			
		||||
template <class vector_type>
 | 
			
		||||
class GridFourDimModule : public GridModule
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  GridFourDimModule()
 | 
			
		||||
  {
 | 
			
		||||
    using namespace QCD;
 | 
			
		||||
    set_full(SpaceTimeGrid::makeFourDimGrid(
 | 
			
		||||
        GridDefaultLatt(), 
 | 
			
		||||
        GridDefaultSimd(4, vector_type::Nsimd()),
 | 
			
		||||
        GridDefaultMpi()));
 | 
			
		||||
    set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridFourDimModule(const std::vector<int> tweak_simd)
 | 
			
		||||
  {
 | 
			
		||||
    using namespace QCD;
 | 
			
		||||
    if (tweak_simd.size() != 4)
 | 
			
		||||
    {
 | 
			
		||||
      std::cout << GridLogError
 | 
			
		||||
                << "Error in GridFourDimModule: SIMD size different from 4" 
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Checks that the product agrees with the expectation
 | 
			
		||||
    int simd_sum = 1;
 | 
			
		||||
    for (auto &n : tweak_simd)
 | 
			
		||||
      simd_sum *= n;
 | 
			
		||||
    std::cout << GridLogDebug << "TweakSIMD: " << tweak_simd << "  Sum: " << simd_sum << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (simd_sum == vector_type::Nsimd())
 | 
			
		||||
    {
 | 
			
		||||
      set_full(SpaceTimeGrid::makeFourDimGrid(
 | 
			
		||||
          GridDefaultLatt(), 
 | 
			
		||||
          tweak_simd, 
 | 
			
		||||
          GridDefaultMpi()));
 | 
			
		||||
      set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      std::cout << GridLogError 
 | 
			
		||||
                << "Error in GridFourDimModule: SIMD lanes must sum to " 
 | 
			
		||||
                << vector_type::Nsimd() 
 | 
			
		||||
                << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GridFourDimModule(const GridModuleParameters Params)
 | 
			
		||||
  {
 | 
			
		||||
    using namespace QCD;
 | 
			
		||||
    std::vector<int> lattice_v = Params.getLattice();
 | 
			
		||||
    std::vector<int> mpi_v = Params.getMpi();
 | 
			
		||||
    if (lattice_v.size() == 4)
 | 
			
		||||
    {
 | 
			
		||||
      set_full(SpaceTimeGrid::makeFourDimGrid(
 | 
			
		||||
          lattice_v, 
 | 
			
		||||
          GridDefaultSimd(4, vector_type::Nsimd()),
 | 
			
		||||
          mpi_v));
 | 
			
		||||
      set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get()));
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      std::cout << GridLogError
 | 
			
		||||
                << "Error in GridFourDimModule: lattice dimension different from 4"
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef GridFourDimModule<vComplex> GridDefaultFourDimModule;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif  // HMC_GRID_MODULES
 | 
			
		||||
@@ -1,107 +0,0 @@
 | 
			
		||||
Using HMC in Grid version 0.5.1
 | 
			
		||||
 | 
			
		||||
These are the instructions to use the Generalised HMC on Grid version 0.5.1.
 | 
			
		||||
Disclaimer: GRID is still under active development so any information here can be changed in future releases.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Command line options
 | 
			
		||||
===================
 | 
			
		||||
(relevant file GenericHMCrunner.h)
 | 
			
		||||
The initial configuration can be changed at the command line using 
 | 
			
		||||
--StartType <your choice>
 | 
			
		||||
valid choices, one among these
 | 
			
		||||
HotStart, ColdStart, TepidStart, CheckpointStart
 | 
			
		||||
default: HotStart
 | 
			
		||||
 | 
			
		||||
example
 | 
			
		||||
./My_hmc_exec  --StartType HotStart
 | 
			
		||||
 | 
			
		||||
The CheckpointStart option uses the prefix for the configurations and rng seed files defined in your executable and the initial configuration is specified by
 | 
			
		||||
--StartTrajectory <integer>
 | 
			
		||||
default: 0
 | 
			
		||||
 | 
			
		||||
The number of trajectories for a specific run are specified at command line by
 | 
			
		||||
--Trajectories <integer>
 | 
			
		||||
default: 1
 | 
			
		||||
 | 
			
		||||
The number of thermalization steps (i.e. steps when the Metropolis acceptance check is turned off) is specified by
 | 
			
		||||
--Thermalizations <integer>
 | 
			
		||||
default: 10
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Any other parameter is defined in the source for the executable.
 | 
			
		||||
 | 
			
		||||
HMC controls
 | 
			
		||||
===========
 | 
			
		||||
 | 
			
		||||
The lines 
 | 
			
		||||
 | 
			
		||||
  std::vector<int> SerSeed({1, 2, 3, 4, 5});
 | 
			
		||||
  std::vector<int> ParSeed({6, 7, 8, 9, 10});
 | 
			
		||||
 | 
			
		||||
define the seeds for the serial and the parallel RNG.
 | 
			
		||||
 | 
			
		||||
The line 
 | 
			
		||||
 | 
			
		||||
  TheHMC.MDparameters.set(20, 1.0);// MDsteps, traj length
 | 
			
		||||
 | 
			
		||||
declares the number of molecular dynamics steps and the total trajectory length.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Actions
 | 
			
		||||
======
 | 
			
		||||
 | 
			
		||||
Action names are defined in the file
 | 
			
		||||
lib/qcd/Actions.h
 | 
			
		||||
 | 
			
		||||
Gauge actions list:
 | 
			
		||||
 | 
			
		||||
WilsonGaugeActionR;
 | 
			
		||||
WilsonGaugeActionF;
 | 
			
		||||
WilsonGaugeActionD;
 | 
			
		||||
PlaqPlusRectangleActionR;
 | 
			
		||||
PlaqPlusRectangleActionF;
 | 
			
		||||
PlaqPlusRectangleActionD;
 | 
			
		||||
IwasakiGaugeActionR;
 | 
			
		||||
IwasakiGaugeActionF;
 | 
			
		||||
IwasakiGaugeActionD;
 | 
			
		||||
SymanzikGaugeActionR;
 | 
			
		||||
SymanzikGaugeActionF;
 | 
			
		||||
SymanzikGaugeActionD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ConjugateWilsonGaugeActionR;
 | 
			
		||||
ConjugateWilsonGaugeActionF;
 | 
			
		||||
ConjugateWilsonGaugeActionD;
 | 
			
		||||
ConjugatePlaqPlusRectangleActionR;
 | 
			
		||||
ConjugatePlaqPlusRectangleActionF;
 | 
			
		||||
ConjugatePlaqPlusRectangleActionD;
 | 
			
		||||
ConjugateIwasakiGaugeActionR;
 | 
			
		||||
ConjugateIwasakiGaugeActionF;
 | 
			
		||||
ConjugateIwasakiGaugeActionD;
 | 
			
		||||
ConjugateSymanzikGaugeActionR;
 | 
			
		||||
ConjugateSymanzikGaugeActionF;
 | 
			
		||||
ConjugateSymanzikGaugeActionD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ScalarActionR;
 | 
			
		||||
ScalarActionF;
 | 
			
		||||
ScalarActionD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
each of these action accept one single parameter at creation time (beta).
 | 
			
		||||
Example for creating a Symanzik action with beta=4.0
 | 
			
		||||
 | 
			
		||||
	SymanzikGaugeActionR(4.0)
 | 
			
		||||
 | 
			
		||||
The suffixes R,F,D in the action names refer to the Real
 | 
			
		||||
(the precision is defined at compile time by the --enable-precision flag in the configure),
 | 
			
		||||
Float and Double, that force the precision of the action to be 32, 64 bit respectively.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1,97 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/BaseCheckpointer.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef BASE_CHECKPOINTER
 | 
			
		||||
#define BASE_CHECKPOINTER
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
class CheckpointerParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters, 
 | 
			
		||||
  	std::string, config_prefix, 
 | 
			
		||||
  	std::string, rng_prefix, 
 | 
			
		||||
  	int, saveInterval, 
 | 
			
		||||
  	std::string, format, );
 | 
			
		||||
 | 
			
		||||
  CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
 | 
			
		||||
   		      int savemodulo = 1, const std::string &f = "IEEE64BIG")
 | 
			
		||||
      : config_prefix(cf),
 | 
			
		||||
        rng_prefix(rn),
 | 
			
		||||
        saveInterval(savemodulo),
 | 
			
		||||
        format(f){};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass >
 | 
			
		||||
  CheckpointerParameters(Reader<ReaderClass> &Reader) {
 | 
			
		||||
    read(Reader, "Checkpointer", *this);
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Base class for checkpointers
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  void build_filenames(int traj, CheckpointerParameters &Params,
 | 
			
		||||
                       std::string &conf_file, std::string &rng_file) {
 | 
			
		||||
    {
 | 
			
		||||
      std::ostringstream os;
 | 
			
		||||
      os << Params.rng_prefix << "." << traj;
 | 
			
		||||
      rng_file = os.str();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
      std::ostringstream os;
 | 
			
		||||
      os << Params.config_prefix << "." << traj;
 | 
			
		||||
      conf_file = os.str();
 | 
			
		||||
    }
 | 
			
		||||
 	} 
 | 
			
		||||
 | 
			
		||||
  void check_filename(const std::string &filename){
 | 
			
		||||
    std::ifstream f(filename.c_str());
 | 
			
		||||
    if(!f.good()){
 | 
			
		||||
      std::cout << GridLogError << "Filename " << filename << " not found. Aborting. " << std::endl;
 | 
			
		||||
      abort();
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void initialize(const CheckpointerParameters &Params) = 0;
 | 
			
		||||
 | 
			
		||||
  virtual void CheckpointRestore(int traj, typename Impl::Field &U,
 | 
			
		||||
                                 GridSerialRNG &sRNG,
 | 
			
		||||
                                 GridParallelRNG &pRNG) = 0;
 | 
			
		||||
 | 
			
		||||
};  // class BaseHmcCheckpointer
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,116 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/BinaryCheckpointer.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef BINARY_CHECKPOINTER
 | 
			
		||||
#define BINARY_CHECKPOINTER
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// Simple checkpointer, only binary file
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> {
 | 
			
		||||
 private:
 | 
			
		||||
  CheckpointerParameters Params;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_FIELD_TYPES(Impl);  // Gets the Field type, a Lattice object
 | 
			
		||||
 | 
			
		||||
  // Extract types from the Field
 | 
			
		||||
  typedef typename Field::vector_object vobj;
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
 | 
			
		||||
  typedef typename sobj::DoublePrecision sobj_double;
 | 
			
		||||
 | 
			
		||||
  BinaryHmcCheckpointer(const CheckpointerParameters &Params_) {
 | 
			
		||||
    initialize(Params_);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void initialize(const CheckpointerParameters &Params_) { Params = Params_; }
 | 
			
		||||
 | 
			
		||||
  void truncate(std::string file) {
 | 
			
		||||
    std::ofstream fout(file, std::ios::out);
 | 
			
		||||
    fout.close();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
 | 
			
		||||
 | 
			
		||||
    if ((traj % Params.saveInterval) == 0) {
 | 
			
		||||
      std::string config, rng;
 | 
			
		||||
      this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
 | 
			
		||||
      uint32_t nersc_csum;
 | 
			
		||||
      uint32_t scidac_csuma;
 | 
			
		||||
      uint32_t scidac_csumb;
 | 
			
		||||
      
 | 
			
		||||
      BinarySimpleUnmunger<sobj_double, sobj> munge;
 | 
			
		||||
      truncate(rng);
 | 
			
		||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
      truncate(config);
 | 
			
		||||
 | 
			
		||||
      BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
 | 
			
		||||
						      nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Written Binary Configuration " << config
 | 
			
		||||
                << " checksum " << std::hex 
 | 
			
		||||
		<< nersc_csum   <<"/"
 | 
			
		||||
		<< scidac_csuma   <<"/"
 | 
			
		||||
		<< scidac_csumb 
 | 
			
		||||
		<< std::dec << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
 | 
			
		||||
    std::string config, rng;
 | 
			
		||||
    this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
    this->check_filename(rng);
 | 
			
		||||
    this->check_filename(config);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    BinarySimpleMunger<sobj_double, sobj> munge;
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum;
 | 
			
		||||
    uint32_t scidac_csuma;
 | 
			
		||||
    uint32_t scidac_csumb;
 | 
			
		||||
    BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
    BinaryIO::readLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
 | 
			
		||||
						   nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogMessage << "Read Binary Configuration " << config
 | 
			
		||||
              << " checksums " << std::hex << nersc_csum<<"/"<<scidac_csuma<<"/"<<scidac_csumb 
 | 
			
		||||
	      << std::dec << std::endl;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,172 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef CP_MODULES_H
 | 
			
		||||
#define CP_MODULES_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// FIXME  Reorganize QCD namespace 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Checkpoint module, owns the Checkpointer
 | 
			
		||||
////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class ImplementationPolicy>
 | 
			
		||||
class CheckPointerModule: public Parametrized<QCD::CheckpointerParameters>, public HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> >  {
 | 
			
		||||
 public:
 | 
			
		||||
 	std::unique_ptr<QCD::BaseHmcCheckpointer<ImplementationPolicy> > CheckPointPtr;
 | 
			
		||||
 	typedef QCD::CheckpointerParameters APar;
 | 
			
		||||
  typedef HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
 | 
			
		||||
  CheckPointerModule(APar Par): Parametrized<APar>(Par) {}
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  CheckPointerModule(Reader<ReaderClass>& Reader) : Parametrized<APar>(Reader){};
 | 
			
		||||
 | 
			
		||||
  virtual void print_parameters(){
 | 
			
		||||
  	std::cout << this->Par_ << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!CheckPointPtr) initialize();
 | 
			
		||||
 | 
			
		||||
    return CheckPointPtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <char const *str, class ImplementationPolicy, class ReaderClass >
 | 
			
		||||
class HMC_CPModuleFactory
 | 
			
		||||
    : public Factory < HMCModuleBase< QCD::BaseHmcCheckpointer<ImplementationPolicy> > ,	Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
 	typedef Reader<ReaderClass> TheReader; 
 | 
			
		||||
 	// use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  HMC_CPModuleFactory(const HMC_CPModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMC_CPModuleFactory& e) = delete;
 | 
			
		||||
  static HMC_CPModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMC_CPModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMC_CPModuleFactory(void) = default;
 | 
			
		||||
  std::string obj_type() const {
 | 
			
		||||
  	return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Concrete classes
 | 
			
		||||
/////////////////////////////////////////////////////////////////////
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
template<class ImplementationPolicy>
 | 
			
		||||
class BinaryCPModule: public CheckPointerModule< ImplementationPolicy> {
 | 
			
		||||
  typedef CheckPointerModule< ImplementationPolicy> CPBase;
 | 
			
		||||
  using CPBase::CPBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->CheckPointPtr.reset(new BinaryHmcCheckpointer<ImplementationPolicy>(this->Par_));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class ImplementationPolicy>
 | 
			
		||||
class NerscCPModule: public CheckPointerModule< ImplementationPolicy> {
 | 
			
		||||
  typedef CheckPointerModule< ImplementationPolicy> CPBase;
 | 
			
		||||
  using CPBase::CPBase; // for constructors inheritance
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
     this->CheckPointPtr.reset(new NerscHmcCheckpointer<ImplementationPolicy>(this->Par_));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
  
 | 
			
		||||
template<class ImplementationPolicy>
 | 
			
		||||
class ILDGCPModule: public CheckPointerModule< ImplementationPolicy> {
 | 
			
		||||
  typedef CheckPointerModule< ImplementationPolicy> CPBase;
 | 
			
		||||
  using CPBase::CPBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
     this->CheckPointPtr.reset(new ILDGHmcCheckpointer<ImplementationPolicy>(this->Par_));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class ImplementationPolicy, class Metadata>
 | 
			
		||||
class ScidacCPModule: public CheckPointerModule< ImplementationPolicy> {
 | 
			
		||||
  typedef CheckPointerModule< ImplementationPolicy> CPBase;
 | 
			
		||||
  Metadata M;
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
     this->CheckPointPtr.reset(new ScidacHmcCheckpointer<ImplementationPolicy, Metadata>(this->Par_, M));
 | 
			
		||||
  }
 | 
			
		||||
public:
 | 
			
		||||
  ScidacCPModule(typename CPBase::APar Par, Metadata M_):M(M_), CPBase(Par) {}
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  ScidacCPModule(Reader<ReaderClass>& Reader) : Parametrized<typename CPBase::APar>(Reader), M(Reader){};
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}// QCD temporarily here
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern char cp_string[];
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
// use macros?
 | 
			
		||||
static Registrar<QCD::BinaryCPModule<QCD::PeriodicGimplR>, HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPBinarymodXMLInit("Binary");
 | 
			
		||||
static Registrar<QCD::NerscCPModule<QCD::PeriodicGimplR> , HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPNerscmodXMLInit("Nersc");
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
static Registrar<QCD::ILDGCPModule<QCD::PeriodicGimplR>  , HMC_CPModuleFactory<cp_string, QCD::PeriodicGimplR, XmlReader> > __CPILDGmodXMLInit("ILDG");
 | 
			
		||||
#endif
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
}// Grid
 | 
			
		||||
#endif //CP_MODULES_H
 | 
			
		||||
@@ -1,41 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef CHECKPOINTERS_H
 | 
			
		||||
#define CHECKPOINTERS_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/BaseCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/NerscCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h>
 | 
			
		||||
//#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif // CHECKPOINTERS_H
 | 
			
		||||
@@ -1,124 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/ILDGCheckpointer.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef ILDG_CHECKPOINTER
 | 
			
		||||
#define ILDG_CHECKPOINTER
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// Only for Gauge fields
 | 
			
		||||
template <class Implementation>
 | 
			
		||||
class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
 | 
			
		||||
 private:
 | 
			
		||||
  CheckpointerParameters Params;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Implementation);
 | 
			
		||||
 | 
			
		||||
  ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
 | 
			
		||||
 | 
			
		||||
  void initialize(const CheckpointerParameters &Params_) {
 | 
			
		||||
    Params = Params_;
 | 
			
		||||
 | 
			
		||||
    // check here that the format is valid
 | 
			
		||||
    int ieee32big = (Params.format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (Params.format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (Params.format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (Params.format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
 | 
			
		||||
      std::cout << GridLogError << "Unrecognized file format " << Params.format
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      std::cout << GridLogError
 | 
			
		||||
                << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
 | 
			
		||||
                << std::endl;
 | 
			
		||||
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
			
		||||
                          GridParallelRNG &pRNG) {
 | 
			
		||||
    if ((traj % Params.saveInterval) == 0) {
 | 
			
		||||
      std::string config, rng;
 | 
			
		||||
      this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
      GridBase *grid = U._grid;
 | 
			
		||||
      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
      IldgWriter _IldgWriter(grid->IsBoss());
 | 
			
		||||
      _IldgWriter.open(config);
 | 
			
		||||
      _IldgWriter.writeConfiguration(U, traj, config, config);
 | 
			
		||||
      _IldgWriter.close();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Written ILDG Configuration on " << config
 | 
			
		||||
                << " checksum " << std::hex 
 | 
			
		||||
		<< nersc_csum<<"/"
 | 
			
		||||
		<< scidac_csuma<<"/"
 | 
			
		||||
		<< scidac_csumb
 | 
			
		||||
		<< std::dec << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
			
		||||
                         GridParallelRNG &pRNG) {
 | 
			
		||||
    std::string config, rng;
 | 
			
		||||
    this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
    this->check_filename(rng);
 | 
			
		||||
    this->check_filename(config);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
    BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
    IldgReader _IldgReader;
 | 
			
		||||
    _IldgReader.open(config);
 | 
			
		||||
    _IldgReader.readConfiguration(U,header);  // format from the header
 | 
			
		||||
    _IldgReader.close();
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "Read ILDG Configuration from " << config
 | 
			
		||||
              << " checksum " << std::hex 
 | 
			
		||||
	      << nersc_csum<<"/"
 | 
			
		||||
	      << scidac_csuma<<"/"
 | 
			
		||||
	      << scidac_csumb
 | 
			
		||||
	      << std::dec << std::endl;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // HAVE_LIME
 | 
			
		||||
#endif  // ILDG_CHECKPOINTER
 | 
			
		||||
@@ -1,83 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/NerscCheckpointer.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef NERSC_CHECKPOINTER
 | 
			
		||||
#define NERSC_CHECKPOINTER
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// Only for Gauge fields
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> {
 | 
			
		||||
 private:
 | 
			
		||||
  CheckpointerParameters Params;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);  // only for gauge configurations
 | 
			
		||||
 | 
			
		||||
  NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
 | 
			
		||||
 | 
			
		||||
  void initialize(const CheckpointerParameters &Params_) {
 | 
			
		||||
    Params = Params_;
 | 
			
		||||
    Params.format = "IEEE64BIG";  // fixed, overwrite any other choice
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
			
		||||
                          GridParallelRNG &pRNG) {
 | 
			
		||||
    if ((traj % Params.saveInterval) == 0) {
 | 
			
		||||
      std::string config, rng;
 | 
			
		||||
      this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
 | 
			
		||||
      int precision32 = 1;
 | 
			
		||||
      int tworow = 0;
 | 
			
		||||
      NerscIO::writeRNGState(sRNG, pRNG, rng);
 | 
			
		||||
      NerscIO::writeConfiguration(U, config, tworow, precision32);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
			
		||||
                         GridParallelRNG &pRNG) {
 | 
			
		||||
    std::string config, rng;
 | 
			
		||||
    this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
    this->check_filename(rng);
 | 
			
		||||
    this->check_filename(config);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    FieldMetaData header;
 | 
			
		||||
    NerscIO::readRNGState(sRNG, pRNG, header, rng);
 | 
			
		||||
    NerscIO::readConfiguration(U, header, config);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,122 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/ScidacCheckpointer.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2018
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef SCIDAC_CHECKPOINTER
 | 
			
		||||
#define SCIDAC_CHECKPOINTER
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// For generic fields
 | 
			
		||||
template <class Implementation, class Metadata>
 | 
			
		||||
class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
 | 
			
		||||
 private:
 | 
			
		||||
  CheckpointerParameters Params;
 | 
			
		||||
  Metadata MData;
 | 
			
		||||
 | 
			
		||||
  typedef typename Implementation::Field Field;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  //INHERIT_GIMPL_TYPES(Implementation);
 | 
			
		||||
 | 
			
		||||
  ScidacHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
 | 
			
		||||
  ScidacHmcCheckpointer(const CheckpointerParameters &Params_, const Metadata& M_):MData(M_) { initialize(Params_); }
 | 
			
		||||
 | 
			
		||||
  void initialize(const CheckpointerParameters &Params_) {
 | 
			
		||||
    Params = Params_;
 | 
			
		||||
 | 
			
		||||
    // check here that the format is valid
 | 
			
		||||
    int ieee32big = (Params.format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (Params.format == std::string("IEEE32"));
 | 
			
		||||
    int ieee64big = (Params.format == std::string("IEEE64BIG"));
 | 
			
		||||
    int ieee64    = (Params.format == std::string("IEEE64"));
 | 
			
		||||
 | 
			
		||||
    if (!(ieee64big || ieee32 || ieee32big || ieee64)) {
 | 
			
		||||
      std::cout << GridLogError << "Unrecognized file format " << Params.format
 | 
			
		||||
                << std::endl;
 | 
			
		||||
      std::cout << GridLogError
 | 
			
		||||
                << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
 | 
			
		||||
                << std::endl;
 | 
			
		||||
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
 | 
			
		||||
                          GridParallelRNG &pRNG) {
 | 
			
		||||
    if ((traj % Params.saveInterval) == 0) {
 | 
			
		||||
      std::string config, rng;
 | 
			
		||||
      this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
      GridBase *grid = U._grid;
 | 
			
		||||
      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
      ScidacWriter _ScidacWriter(grid->IsBoss());
 | 
			
		||||
      _ScidacWriter.open(config);
 | 
			
		||||
      _ScidacWriter.writeScidacFieldRecord(U, MData);
 | 
			
		||||
      _ScidacWriter.close();
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
 | 
			
		||||
                         GridParallelRNG &pRNG) {
 | 
			
		||||
    std::string config, rng;
 | 
			
		||||
    this->build_filenames(traj, Params, config, rng);
 | 
			
		||||
    this->check_filename(rng);
 | 
			
		||||
    this->check_filename(config);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
			
		||||
    BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
			
		||||
 | 
			
		||||
    Metadata md_content;
 | 
			
		||||
    ScidacReader _ScidacReader;
 | 
			
		||||
    _ScidacReader.open(config);
 | 
			
		||||
    _ScidacReader.readScidacFieldRecord(U,md_content);  // format from the header
 | 
			
		||||
    _ScidacReader.close();
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "Read Scidac Configuration from " << config
 | 
			
		||||
              << " checksum " << std::hex 
 | 
			
		||||
	      << nersc_csum<<"/"
 | 
			
		||||
	      << scidac_csuma<<"/"
 | 
			
		||||
	      << scidac_csumb
 | 
			
		||||
	      << std::dec << std::endl;
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // HAVE_LIME
 | 
			
		||||
#endif  // ILDG_CHECKPOINTER
 | 
			
		||||
@@ -1,295 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/integrators/Integrator_algorithm.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
//--------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*! @file Integrator_algorithm.h
 | 
			
		||||
 * @brief Declaration of classes for the Molecular Dynamics algorithms
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
//--------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#ifndef INTEGRATOR_ALG_INCLUDED
 | 
			
		||||
#define INTEGRATOR_ALG_INCLUDED
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
/* PAB:
 | 
			
		||||
 *
 | 
			
		||||
 * Recursive leapfrog; explanation of nested stepping
 | 
			
		||||
 *
 | 
			
		||||
 * Nested 1:4; units in dt for top level integrator
 | 
			
		||||
 *
 | 
			
		||||
 * CHROMA                           IroIro
 | 
			
		||||
 *   0        1                      0
 | 
			
		||||
 *  P 1/2                           P 1/2
 | 
			
		||||
 *          P 1/16                                  P1/16
 | 
			
		||||
 *                 U 1/8                                   U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                   U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/16                                  P1/8
 | 
			
		||||
 *  P 1                             P 1
 | 
			
		||||
 *          P 1/16                    * skipped --- avoids revaluating force
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/16                                  P1/8
 | 
			
		||||
 *  P 1                             P 1
 | 
			
		||||
 *          P 1/16                    * skipped
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/16                    * skipped
 | 
			
		||||
 *  P 1                             P 1
 | 
			
		||||
 *          P 1/16                                  P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/8                                   P1/8
 | 
			
		||||
 *                 U 1/8                                 U1/8
 | 
			
		||||
 *          P 1/16                                  P1/16
 | 
			
		||||
 *  P 1/2                            P 1/2
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
template <class FieldImplementation, class SmearingPolicy,
 | 
			
		||||
          class RepresentationPolicy =
 | 
			
		||||
              Representations<FundamentalRepresentation> >
 | 
			
		||||
class LeapFrog : public Integrator<FieldImplementation, SmearingPolicy,
 | 
			
		||||
                                   RepresentationPolicy> {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef LeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy>
 | 
			
		||||
      Algorithm;
 | 
			
		||||
  INHERIT_FIELD_TYPES(FieldImplementation);
 | 
			
		||||
 | 
			
		||||
  std::string integrator_name(){return "LeapFrog";}
 | 
			
		||||
 | 
			
		||||
  LeapFrog(GridBase* grid, IntegratorParameters Par,
 | 
			
		||||
           ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
 | 
			
		||||
      : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
 | 
			
		||||
            grid, Par, Aset, Sm){};
 | 
			
		||||
 | 
			
		||||
  void step(Field& U, int level, int _first, int _last) {
 | 
			
		||||
    int fl = this->as.size() - 1;
 | 
			
		||||
    // level  : current level
 | 
			
		||||
    // fl     : final level
 | 
			
		||||
    // eps    : current step size
 | 
			
		||||
 | 
			
		||||
    // Get current level step size
 | 
			
		||||
    RealD eps = this->Params.trajL/this->Params.MDsteps;
 | 
			
		||||
    for (int l = 0; l <= level; ++l) eps /= this->as[l].multiplier;
 | 
			
		||||
 | 
			
		||||
    int multiplier = this->as[level].multiplier;
 | 
			
		||||
    for (int e = 0; e < multiplier; ++e) {
 | 
			
		||||
      int first_step = _first && (e == 0);
 | 
			
		||||
      int last_step = _last && (e == multiplier - 1);
 | 
			
		||||
 | 
			
		||||
      if (first_step) {  // initial half step
 | 
			
		||||
        this->update_P(U, level, eps / 2.0);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (level == fl) {  // lowest level
 | 
			
		||||
        this->update_U(U, eps);
 | 
			
		||||
      } else {  // recursive function call
 | 
			
		||||
        this->step(U, level + 1, first_step, last_step);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      int mm = last_step ? 1 : 2;
 | 
			
		||||
      this->update_P(U, level, mm * eps / 2.0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FieldImplementation, class SmearingPolicy,
 | 
			
		||||
          class RepresentationPolicy =
 | 
			
		||||
              Representations<FundamentalRepresentation> >
 | 
			
		||||
class MinimumNorm2 : public Integrator<FieldImplementation, SmearingPolicy,
 | 
			
		||||
                                       RepresentationPolicy> {
 | 
			
		||||
 private:
 | 
			
		||||
  const RealD lambda = 0.1931833275037836;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_FIELD_TYPES(FieldImplementation);
 | 
			
		||||
 | 
			
		||||
  MinimumNorm2(GridBase* grid, IntegratorParameters Par,
 | 
			
		||||
               ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
 | 
			
		||||
      : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
 | 
			
		||||
            grid, Par, Aset, Sm){};
 | 
			
		||||
 | 
			
		||||
  std::string integrator_name(){return "MininumNorm2";}
 | 
			
		||||
 | 
			
		||||
  void step(Field& U, int level, int _first, int _last) {
 | 
			
		||||
    // level  : current level
 | 
			
		||||
    // fl     : final level
 | 
			
		||||
    // eps    : current step size
 | 
			
		||||
 | 
			
		||||
    int fl = this->as.size() - 1;
 | 
			
		||||
 | 
			
		||||
    RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0;
 | 
			
		||||
    for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier;
 | 
			
		||||
 | 
			
		||||
    // Nesting:  2xupdate_U of size eps/2
 | 
			
		||||
    // Next level is eps/2/multiplier
 | 
			
		||||
 | 
			
		||||
    int multiplier = this->as[level].multiplier;
 | 
			
		||||
    for (int e = 0; e < multiplier; ++e) {  // steps per step
 | 
			
		||||
 | 
			
		||||
      int first_step = _first && (e == 0);
 | 
			
		||||
      int last_step = _last && (e == multiplier - 1);
 | 
			
		||||
 | 
			
		||||
      if (first_step) {  // initial half step
 | 
			
		||||
        this->update_P(U, level, lambda * eps);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (level == fl) {  // lowest level
 | 
			
		||||
        this->update_U(U, 0.5 * eps);
 | 
			
		||||
      } else {  // recursive function call
 | 
			
		||||
        this->step(U, level + 1, first_step, 0);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this->update_P(U, level, (1.0 - 2.0 * lambda) * eps);
 | 
			
		||||
 | 
			
		||||
      if (level == fl) {  // lowest level
 | 
			
		||||
        this->update_U(U, 0.5 * eps);
 | 
			
		||||
      } else {  // recursive function call
 | 
			
		||||
        this->step(U, level + 1, 0, last_step);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      int mm = (last_step) ? 1 : 2;
 | 
			
		||||
      this->update_P(U, level, lambda * eps * mm);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FieldImplementation, class SmearingPolicy,
 | 
			
		||||
          class RepresentationPolicy =
 | 
			
		||||
              Representations<FundamentalRepresentation> >
 | 
			
		||||
class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
 | 
			
		||||
                                        RepresentationPolicy> {
 | 
			
		||||
 private:
 | 
			
		||||
  const RealD lambda = 1.0 / 6.0;
 | 
			
		||||
  ;
 | 
			
		||||
  const RealD chi = 1.0 / 72.0;
 | 
			
		||||
  const RealD xi = 0.0;
 | 
			
		||||
  const RealD theta = 0.0;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_FIELD_TYPES(FieldImplementation);
 | 
			
		||||
 | 
			
		||||
  // Looks like dH scales as dt^4. tested wilson/wilson 2 level.
 | 
			
		||||
  ForceGradient(GridBase* grid, IntegratorParameters Par,
 | 
			
		||||
                ActionSet<Field, RepresentationPolicy>& Aset,
 | 
			
		||||
                SmearingPolicy& Sm)
 | 
			
		||||
      : Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>(
 | 
			
		||||
            grid, Par, Aset, Sm){};
 | 
			
		||||
 | 
			
		||||
  std::string integrator_name(){return "ForceGradient";}
 | 
			
		||||
  
 | 
			
		||||
  void FG_update_P(Field& U, int level, double fg_dt, double ep) {
 | 
			
		||||
    Field Ufg(U._grid);
 | 
			
		||||
    Field Pfg(U._grid);
 | 
			
		||||
    Ufg = U;
 | 
			
		||||
    Pfg = zero;
 | 
			
		||||
    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
 | 
			
		||||
              << std::endl;
 | 
			
		||||
    // prepare_fg; no prediction/result cache for now
 | 
			
		||||
    // could relax CG stopping conditions for the
 | 
			
		||||
    // derivatives in the small step since the force gets multiplied by
 | 
			
		||||
    // a tiny dt^2 term relative to main force.
 | 
			
		||||
    //
 | 
			
		||||
    // Presently 4 force evals, and should have 3, so 1.33x too expensive.
 | 
			
		||||
    // could reduce this with sloppy CG to perhaps 1.15x too expensive
 | 
			
		||||
    // even without prediction.
 | 
			
		||||
    this->update_P(Pfg, Ufg, level, 1.0);
 | 
			
		||||
    this->update_U(Pfg, Ufg, fg_dt);
 | 
			
		||||
    this->update_P(Ufg, level, ep);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void step(Field& U, int level, int _first, int _last) {
 | 
			
		||||
    RealD eps = this->Params.trajL/this->Params.MDsteps * 2.0;
 | 
			
		||||
    for (int l = 0; l <= level; ++l) eps /= 2.0 * this->as[l].multiplier;
 | 
			
		||||
 | 
			
		||||
    RealD Chi = chi * eps * eps * eps;
 | 
			
		||||
 | 
			
		||||
    int fl = this->as.size() - 1;
 | 
			
		||||
 | 
			
		||||
    int multiplier = this->as[level].multiplier;
 | 
			
		||||
 | 
			
		||||
    for (int e = 0; e < multiplier; ++e) {  // steps per step
 | 
			
		||||
 | 
			
		||||
      int first_step = _first && (e == 0);
 | 
			
		||||
      int last_step = _last && (e == multiplier - 1);
 | 
			
		||||
 | 
			
		||||
      if (first_step) {  // initial half step
 | 
			
		||||
        this->update_P(U, level, lambda * eps);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (level == fl) {  // lowest level
 | 
			
		||||
        this->update_U(U, 0.5 * eps);
 | 
			
		||||
      } else {  // recursive function call
 | 
			
		||||
        this->step(U, level + 1, first_step, 0);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
 | 
			
		||||
                        (1.0 - 2.0 * lambda) * eps);
 | 
			
		||||
 | 
			
		||||
      if (level == fl) {  // lowest level
 | 
			
		||||
        this->update_U(U, 0.5 * eps);
 | 
			
		||||
      } else {  // recursive function call
 | 
			
		||||
        this->step(U, level + 1, 0, last_step);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      int mm = (last_step) ? 1 : 2;
 | 
			
		||||
      this->update_P(U, level, lambda * eps * mm);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // INTEGRATOR_INCLUDED
 | 
			
		||||
@@ -1,517 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/ActionModules.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef ACTION_MODULES_H
 | 
			
		||||
#define ACTION_MODULES_H
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
Define loadable, serializable modules
 | 
			
		||||
for the HMC execution
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
//              Actions
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Product, class R>
 | 
			
		||||
class ActionModuleBase: public HMCModuleBase<Product>{
 | 
			
		||||
public:
 | 
			
		||||
  typedef R Resource;
 | 
			
		||||
  virtual void acquireResource(R& ){};
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class ActionType, class APar>
 | 
			
		||||
class ActionModule
 | 
			
		||||
    : public Parametrized<APar>,
 | 
			
		||||
      public ActionModuleBase< QCD::Action<typename ActionType::GaugeField> , QCD::GridModule > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef ActionModuleBase< QCD::Action<typename ActionType::GaugeField>, QCD::GridModule > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
  typedef APar Parameters;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<ActionType> ActionPtr;
 | 
			
		||||
 | 
			
		||||
  ActionModule(APar Par) : Parametrized<APar>(Par) {}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  ActionModule(Reader<ReaderClass>& Reader) : Parametrized<APar>(Reader){};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  virtual void print_parameters(){
 | 
			
		||||
    Parametrized<APar>::print_parameters();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!ActionPtr) initialize();
 | 
			
		||||
 | 
			
		||||
    return ActionPtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//////////////////////////
 | 
			
		||||
// Modules
 | 
			
		||||
//////////////////////////
 | 
			
		||||
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
class PlaqPlusRectangleGaugeActionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(PlaqPlusRectangleGaugeActionParameters, 
 | 
			
		||||
    RealD, c_plaq,
 | 
			
		||||
    RealD, c_rect);
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class RBCGaugeActionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(RBCGaugeActionParameters, 
 | 
			
		||||
    RealD, beta,
 | 
			
		||||
    RealD, c1);
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class BetaGaugeActionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(BetaGaugeActionParameters, 
 | 
			
		||||
    RealD, beta);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class WilsonGModule: public ActionModule<WilsonGaugeAction<Impl>, BetaGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<WilsonGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new WilsonGaugeAction<Impl>(this->Par_.beta));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class PlaqPlusRectangleGModule: public ActionModule<PlaqPlusRectangleAction<Impl>, PlaqPlusRectangleGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<PlaqPlusRectangleAction<Impl>, PlaqPlusRectangleGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new PlaqPlusRectangleAction<Impl>(this->Par_.c_plaq, this->Par_.c_rect));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class RBCGModule: public ActionModule<RBCGaugeAction<Impl>, RBCGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<RBCGaugeAction<Impl>, RBCGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new RBCGaugeAction<Impl>(this->Par_.beta, this->Par_.c1));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class SymanzikGModule: public ActionModule<SymanzikGaugeAction<Impl>, BetaGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<SymanzikGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new SymanzikGaugeAction<Impl>(this->Par_.beta));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class IwasakiGModule: public ActionModule<IwasakiGaugeAction<Impl>, BetaGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<IwasakiGaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new IwasakiGaugeAction<Impl>(this->Par_.beta));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class DBW2GModule: public ActionModule<DBW2GaugeAction<Impl>, BetaGaugeActionParameters> {
 | 
			
		||||
  typedef ActionModule<DBW2GaugeAction<Impl>, BetaGaugeActionParameters> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ActionPtr.reset(new DBW2GaugeAction<Impl>(this->Par_.beta));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////
 | 
			
		||||
// Fermion Actions
 | 
			
		||||
/////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl, template <typename> class FermionA, class Params = NoParameters >
 | 
			
		||||
class PseudoFermionModuleBase: public ActionModule<FermionA<Impl>, Params> {
 | 
			
		||||
protected:
 | 
			
		||||
  typedef ActionModule<FermionA<Impl>, Params> ActionBase;
 | 
			
		||||
  using ActionBase::ActionBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  typedef std::unique_ptr<FermionOperatorModuleBase<FermionOperator<Impl>> > operator_type;
 | 
			
		||||
  typedef std::unique_ptr<HMCModuleBase<OperatorFunction<typename Impl::FermionField> > > solver_type;
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  void getFermionOperator(Reader<ReaderClass>& Reader, operator_type &fo, std::string section_name){
 | 
			
		||||
    auto &FOFactory = HMC_FermionOperatorModuleFactory<fermionop_string, Impl, ReaderClass>::getInstance();
 | 
			
		||||
    Reader.push(section_name);
 | 
			
		||||
    std::string op_name;
 | 
			
		||||
    read(Reader,"name", op_name);
 | 
			
		||||
    fo = FOFactory.create(op_name, Reader);
 | 
			
		||||
    Reader.pop();  
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  void getSolverOperator(Reader<ReaderClass>& Reader, solver_type &so, std::string section_name){
 | 
			
		||||
    auto& SolverFactory = HMC_SolverModuleFactory<solver_string, typename Impl::FermionField, ReaderClass>::getInstance();
 | 
			
		||||
    Reader.push(section_name);
 | 
			
		||||
    std::string solv_name;
 | 
			
		||||
    read(Reader,"name", solv_name);
 | 
			
		||||
    so = SolverFactory.create(solv_name, Reader);
 | 
			
		||||
    Reader.pop();    
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class TwoFlavourFModule: public PseudoFermionModuleBase<Impl, TwoFlavourPseudoFermionAction>{
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, TwoFlavourPseudoFermionAction> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_mod;
 | 
			
		||||
  typename Base::solver_type   solver_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   TwoFlavourFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getSolverOperator(R, solver_mod, "Solver");
 | 
			
		||||
    this->getFermionOperator(R, fop_mod, "Operator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    // here temporarily assuming that the force and action solver are the same
 | 
			
		||||
    this->ActionPtr.reset(new TwoFlavourPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// very similar, I could have templated this but it is overkilling
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class TwoFlavourEOFModule: public PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction>{
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_mod;
 | 
			
		||||
  typename Base::solver_type   solver_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   TwoFlavourEOFModule(Reader<ReaderClass>& R): PseudoFermionModuleBase<Impl, TwoFlavourEvenOddPseudoFermionAction>(R) {
 | 
			
		||||
    this->getSolverOperator(R, solver_mod, "Solver");
 | 
			
		||||
    this->getFermionOperator(R, fop_mod, "Operator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    // here temporarily assuming that the force and action solver are the same
 | 
			
		||||
    this->ActionPtr.reset(new TwoFlavourEvenOddPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class TwoFlavourRatioFModule: public PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction>{
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_numerator_mod;
 | 
			
		||||
  typename Base::operator_type fop_denominator_mod;
 | 
			
		||||
  typename Base::solver_type   solver_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_numerator_mod->AddGridPair(GridMod);
 | 
			
		||||
    fop_denominator_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   TwoFlavourRatioFModule(Reader<ReaderClass>& R): PseudoFermionModuleBase<Impl, TwoFlavourRatioPseudoFermionAction>(R) {
 | 
			
		||||
    this->getSolverOperator(R, solver_mod, "Solver");
 | 
			
		||||
    this->getFermionOperator(R, fop_numerator_mod, "Numerator");
 | 
			
		||||
    this->getFermionOperator(R, fop_denominator_mod, "Denominator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    // here temporarily assuming that the force and action solver are the same
 | 
			
		||||
    this->ActionPtr.reset(new TwoFlavourRatioPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()), 
 | 
			
		||||
      *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class TwoFlavourRatioEOFModule: public PseudoFermionModuleBase<Impl, TwoFlavourEvenOddRatioPseudoFermionAction>{
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, TwoFlavourEvenOddRatioPseudoFermionAction> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_numerator_mod;
 | 
			
		||||
  typename Base::operator_type fop_denominator_mod;
 | 
			
		||||
  typename Base::solver_type   solver_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_numerator_mod->AddGridPair(GridMod);
 | 
			
		||||
    fop_denominator_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   TwoFlavourRatioEOFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getSolverOperator(R, solver_mod, "Solver");
 | 
			
		||||
    this->getFermionOperator(R, fop_numerator_mod, "Numerator");
 | 
			
		||||
    this->getFermionOperator(R, fop_denominator_mod, "Denominator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    // here temporarily assuming that the force and action solver are the same
 | 
			
		||||
    this->ActionPtr.reset(new TwoFlavourEvenOddRatioPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()), 
 | 
			
		||||
      *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr())));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class OneFlavourFModule: public PseudoFermionModuleBase<Impl, OneFlavourRationalPseudoFermionAction, OneFlavourRationalParams>{
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, OneFlavourRationalPseudoFermionAction, OneFlavourRationalParams> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   OneFlavourFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getFermionOperator(R, fop_mod, "Operator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    this->ActionPtr.reset(new OneFlavourRationalPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), this->Par_ ));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class OneFlavourEOFModule: 
 | 
			
		||||
  public PseudoFermionModuleBase<Impl, OneFlavourEvenOddRationalPseudoFermionAction, OneFlavourRationalParams>
 | 
			
		||||
  {
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, OneFlavourEvenOddRationalPseudoFermionAction, OneFlavourRationalParams> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   OneFlavourEOFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getFermionOperator(R, fop_mod, "Operator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    this->ActionPtr.reset(new OneFlavourEvenOddRationalPseudoFermionAction<Impl>(*(this->fop_mod->getPtr()), this->Par_ ));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class OneFlavourRatioFModule: 
 | 
			
		||||
  public PseudoFermionModuleBase<Impl, OneFlavourRatioRationalPseudoFermionAction, OneFlavourRationalParams>
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, OneFlavourRatioRationalPseudoFermionAction, OneFlavourRationalParams> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_numerator_mod;
 | 
			
		||||
  typename Base::operator_type fop_denominator_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_numerator_mod->AddGridPair(GridMod);
 | 
			
		||||
    fop_denominator_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   OneFlavourRatioFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getFermionOperator(R, fop_numerator_mod, "Numerator");
 | 
			
		||||
    this->getFermionOperator(R, fop_denominator_mod, "Denominator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    this->ActionPtr.reset(new OneFlavourRatioRationalPseudoFermionAction<Impl>( *(this->fop_numerator_mod->getPtr()), 
 | 
			
		||||
                                                                                *(this->fop_denominator_mod->getPtr()), 
 | 
			
		||||
                                                                                this->Par_ ));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl >
 | 
			
		||||
class OneFlavourRatioEOFModule: 
 | 
			
		||||
  public PseudoFermionModuleBase<Impl, OneFlavourEvenOddRatioRationalPseudoFermionAction, OneFlavourRationalParams>
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
  typedef PseudoFermionModuleBase<Impl, OneFlavourEvenOddRatioRationalPseudoFermionAction, OneFlavourRationalParams> Base;
 | 
			
		||||
  using Base::Base;
 | 
			
		||||
 | 
			
		||||
  typename Base::operator_type fop_numerator_mod;
 | 
			
		||||
  typename Base::operator_type fop_denominator_mod;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void acquireResource(typename Base::Resource& GridMod){
 | 
			
		||||
    fop_numerator_mod->AddGridPair(GridMod);
 | 
			
		||||
    fop_denominator_mod->AddGridPair(GridMod);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
   // constructor
 | 
			
		||||
   template <class ReaderClass>
 | 
			
		||||
   OneFlavourRatioEOFModule(Reader<ReaderClass>& R): Base(R) {
 | 
			
		||||
    this->getFermionOperator(R, fop_numerator_mod, "Numerator");
 | 
			
		||||
    this->getFermionOperator(R, fop_denominator_mod, "Denominator");
 | 
			
		||||
   } 
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize() {
 | 
			
		||||
    this->ActionPtr.reset(new OneFlavourEvenOddRatioRationalPseudoFermionAction<Impl>(*(this->fop_numerator_mod->getPtr()), 
 | 
			
		||||
                                                                                      *(this->fop_denominator_mod->getPtr()), 
 | 
			
		||||
                                                                                      this->Par_ ));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}// QCD temporarily here
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
// Factories specialisations
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// use the same classed defined by Antonin, does not make sense to rewrite
 | 
			
		||||
// Factory is perfectly fine
 | 
			
		||||
// Registar must be changed because I do not want to use the ModuleFactory
 | 
			
		||||
 | 
			
		||||
// explicit ref to LatticeGaugeField must be changed or put in the factory
 | 
			
		||||
//typedef ActionModuleBase< QCD::Action< QCD::LatticeGaugeField >, QCD::GridModule > HMC_LGTActionModBase;
 | 
			
		||||
//typedef ActionModuleBase< QCD::Action< QCD::LatticeReal >, QCD::GridModule > HMC_ScalarActionModBase;
 | 
			
		||||
 | 
			
		||||
template <char const *str, class Field, class ReaderClass >
 | 
			
		||||
class HMC_ActionModuleFactory
 | 
			
		||||
    : public Factory < ActionModuleBase< QCD::Action< Field >, QCD::GridModule > , Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef Reader<ReaderClass> TheReader; 
 | 
			
		||||
  // use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  HMC_ActionModuleFactory(const HMC_ActionModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMC_ActionModuleFactory& e) = delete;
 | 
			
		||||
  static HMC_ActionModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMC_ActionModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMC_ActionModuleFactory(void) = default;
 | 
			
		||||
    std::string obj_type() const {
 | 
			
		||||
        return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern char gauge_string[];
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //HMC_MODULES_H
 | 
			
		||||
@@ -1,109 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
Source file: Factory.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef Factory_hpp_
 | 
			
		||||
#define Factory_hpp_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/******************************************************************************
 | 
			
		||||
 *                        abstract factory class                              *
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
template <typename T, typename CreatorInput>
 | 
			
		||||
class Factory
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    typedef std::function< std::unique_ptr<T> (const CreatorInput&) > Func;
 | 
			
		||||
 | 
			
		||||
    // constructor
 | 
			
		||||
    Factory(void) = default;
 | 
			
		||||
    // destructor
 | 
			
		||||
    virtual ~Factory(void) = default;
 | 
			
		||||
    // registration
 | 
			
		||||
    void registerBuilder(const std::string type, const Func &f);
 | 
			
		||||
    // get builder list
 | 
			
		||||
    std::vector<std::string> getBuilderList(void) const;
 | 
			
		||||
    // factory
 | 
			
		||||
    std::unique_ptr<T> create(const std::string type,
 | 
			
		||||
              								const CreatorInput& input) const;
 | 
			
		||||
private:
 | 
			
		||||
    std::map<std::string, Func> builder_;
 | 
			
		||||
    virtual std::string obj_type() const = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/******************************************************************************
 | 
			
		||||
 *                         template implementation                            *
 | 
			
		||||
 ******************************************************************************/
 | 
			
		||||
// registration ////////////////////////////////////////////////////////////////
 | 
			
		||||
template <typename T, typename CreatorInput>
 | 
			
		||||
void Factory<T, CreatorInput>::registerBuilder(const std::string type, const Func &f)
 | 
			
		||||
{
 | 
			
		||||
    builder_[type] = f;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// get module list /////////////////////////////////////////////////////////////
 | 
			
		||||
template <typename T, typename CreatorInput>
 | 
			
		||||
std::vector<std::string> Factory<T, CreatorInput>::getBuilderList(void) const
 | 
			
		||||
{
 | 
			
		||||
    std::vector<std::string> list;
 | 
			
		||||
    
 | 
			
		||||
    for (auto &b: builder_)
 | 
			
		||||
    {
 | 
			
		||||
        list.push_back(b.first);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return list;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// factory /////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <typename T, typename CreatorInput>
 | 
			
		||||
std::unique_ptr<T> Factory<T, CreatorInput>::create(const std::string type,
 | 
			
		||||
                                      							const CreatorInput& input) const
 | 
			
		||||
{
 | 
			
		||||
    Func func;
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogDebug << "Creating object of type "<< type << std::endl;
 | 
			
		||||
    try
 | 
			
		||||
    {
 | 
			
		||||
        func = builder_.at(type);
 | 
			
		||||
    }
 | 
			
		||||
    catch (std::out_of_range &)
 | 
			
		||||
    {
 | 
			
		||||
      //HADRONS_ERROR("object of type '" + type + "' unknown");
 | 
			
		||||
    	std::cout << GridLogError << "Error" << std::endl;
 | 
			
		||||
    	std::cout << GridLogError << obj_type() << " object of name [" << type << "] unknown" << std::endl;
 | 
			
		||||
    	exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return func(input);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif // Factory_hpp_
 | 
			
		||||
@@ -1,243 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/FermionOperatorModules.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef FERMIONOPERATOR_MODULES_H
 | 
			
		||||
#define FERMIONOPERATOR_MODULES_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////
 | 
			
		||||
//  Fermion operators
 | 
			
		||||
/////////////////////////////////////
 | 
			
		||||
template < class Product>
 | 
			
		||||
class FermionOperatorModuleBase : public HMCModuleBase<Product>{
 | 
			
		||||
public:
 | 
			
		||||
  virtual void AddGridPair(QCD::GridModule&) = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <template <typename> class FOType, class FermionImpl, class FOPar>
 | 
			
		||||
class FermionOperatorModule
 | 
			
		||||
    : public Parametrized<FOPar>,
 | 
			
		||||
      public FermionOperatorModuleBase<QCD::FermionOperator<FermionImpl> > {
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
  std::unique_ptr< FOType<FermionImpl> > FOPtr;
 | 
			
		||||
  std::vector< QCD::GridModule* >    GridRefs;
 | 
			
		||||
 public:
 | 
			
		||||
  typedef HMCModuleBase< QCD::FermionOperator<FermionImpl> > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
 | 
			
		||||
  FermionOperatorModule(FOPar Par) : Parametrized<FOPar>(Par) {}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  FermionOperatorModule(Reader<ReaderClass>& Reader) : Parametrized<FOPar>(Reader){};
 | 
			
		||||
 | 
			
		||||
  void AddGridPair(QCD::GridModule &Mod){
 | 
			
		||||
    if (GridRefs.size()>1){
 | 
			
		||||
      std::cout << GridLogError << "Adding too many Grids to the FermionOperatorModule" << std::endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    GridRefs.push_back(&Mod);
 | 
			
		||||
 | 
			
		||||
    if (Ls()){
 | 
			
		||||
      GridRefs.push_back(new QCD::GridModule());
 | 
			
		||||
      GridRefs[1]->set_full(QCD::SpaceTimeGrid::makeFiveDimGrid(Ls(),GridRefs[0]->get_full()));
 | 
			
		||||
      GridRefs[1]->set_rb(QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls(),GridRefs[0]->get_full()));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual unsigned int Ls(){
 | 
			
		||||
    return 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void print_parameters(){
 | 
			
		||||
    std::cout << this->Par_ << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!FOPtr) initialize();
 | 
			
		||||
 | 
			
		||||
    return FOPtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Factory
 | 
			
		||||
template <char const *str, class FermionImpl, class ReaderClass >
 | 
			
		||||
class HMC_FermionOperatorModuleFactory
 | 
			
		||||
    : public Factory < FermionOperatorModuleBase<QCD::FermionOperator<FermionImpl> > ,  Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
  // use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  typedef Reader<ReaderClass> TheReader;
 | 
			
		||||
 | 
			
		||||
  HMC_FermionOperatorModuleFactory(const HMC_FermionOperatorModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMC_FermionOperatorModuleFactory& e) = delete;
 | 
			
		||||
  static HMC_FermionOperatorModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMC_FermionOperatorModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMC_FermionOperatorModuleFactory(void) = default;
 | 
			
		||||
    std::string obj_type() const {
 | 
			
		||||
        return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern char fermionop_string[];
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
// Modules
 | 
			
		||||
class WilsonFermionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonFermionParameters,
 | 
			
		||||
    RealD, mass);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class FermionImpl >
 | 
			
		||||
class WilsonFermionModule: public FermionOperatorModule<WilsonFermion, FermionImpl, WilsonFermionParameters> {
 | 
			
		||||
  typedef FermionOperatorModule<WilsonFermion, FermionImpl, WilsonFermionParameters> FermBase;
 | 
			
		||||
  using FermBase::FermBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    auto GridMod = this->GridRefs[0];
 | 
			
		||||
    typename FermionImpl::GaugeField U(GridMod->get_full());
 | 
			
		||||
    this->FOPtr.reset(new WilsonFermion<FermionImpl>(U, *(GridMod->get_full()), *(GridMod->get_rb()), this->Par_.mass));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MobiusFermionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(MobiusFermionParameters,
 | 
			
		||||
    RealD, mass,
 | 
			
		||||
    RealD, M5,
 | 
			
		||||
    RealD, b,
 | 
			
		||||
    RealD, c,
 | 
			
		||||
    unsigned int, Ls);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FermionImpl >
 | 
			
		||||
class MobiusFermionModule: public FermionOperatorModule<MobiusFermion, FermionImpl, MobiusFermionParameters> {
 | 
			
		||||
  typedef FermionOperatorModule<MobiusFermion, FermionImpl, MobiusFermionParameters> FermBase;
 | 
			
		||||
  using FermBase::FermBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  virtual unsigned int Ls(){
 | 
			
		||||
    return this->Par_.Ls;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    auto GridMod = this->GridRefs[0];
 | 
			
		||||
    auto GridMod5d = this->GridRefs[1];
 | 
			
		||||
    typename FermionImpl::GaugeField U(GridMod->get_full());
 | 
			
		||||
    this->FOPtr.reset(new MobiusFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
 | 
			
		||||
                                                      *(GridMod5d->get_full()), *(GridMod5d->get_rb()),
 | 
			
		||||
                                                      this->Par_.mass, this->Par_.M5, this->Par_.b, this->Par_.c));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DomainWallFermionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallFermionParameters,
 | 
			
		||||
    RealD, mass,
 | 
			
		||||
    RealD, M5,
 | 
			
		||||
    unsigned int, Ls);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FermionImpl >
 | 
			
		||||
class DomainWallFermionModule: public FermionOperatorModule<DomainWallFermion, FermionImpl, DomainWallFermionParameters> {
 | 
			
		||||
  typedef FermionOperatorModule<DomainWallFermion, FermionImpl, DomainWallFermionParameters> FermBase;
 | 
			
		||||
  using FermBase::FermBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  virtual unsigned int Ls(){
 | 
			
		||||
    return this->Par_.Ls;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    auto GridMod = this->GridRefs[0];
 | 
			
		||||
    auto GridMod5d = this->GridRefs[1];
 | 
			
		||||
    typename FermionImpl::GaugeField U(GridMod->get_full());
 | 
			
		||||
    this->FOPtr.reset(new DomainWallFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
 | 
			
		||||
                                                      *(GridMod5d->get_full()), *(GridMod5d->get_rb()),
 | 
			
		||||
                                                      this->Par_.mass, this->Par_.M5));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DomainWallEOFAFermionParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(DomainWallEOFAFermionParameters,
 | 
			
		||||
    RealD, mq1,
 | 
			
		||||
    RealD, mq2,
 | 
			
		||||
    RealD, mq3,
 | 
			
		||||
    RealD, shift,
 | 
			
		||||
    int, pm,
 | 
			
		||||
    RealD, M5,
 | 
			
		||||
    unsigned int, Ls);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class FermionImpl >
 | 
			
		||||
class DomainWallEOFAFermionModule: public FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> {
 | 
			
		||||
  typedef FermionOperatorModule<DomainWallEOFAFermion, FermionImpl, DomainWallEOFAFermionParameters> FermBase;
 | 
			
		||||
  using FermBase::FermBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  virtual unsigned int Ls(){
 | 
			
		||||
    return this->Par_.Ls;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    auto GridMod = this->GridRefs[0];
 | 
			
		||||
    auto GridMod5d = this->GridRefs[1];
 | 
			
		||||
    typename FermionImpl::GaugeField U(GridMod->get_full());
 | 
			
		||||
    this->FOPtr.reset(new DomainWallEOFAFermion<FermionImpl>( U, *(GridMod->get_full()), *(GridMod->get_rb()),
 | 
			
		||||
                                                      *(GridMod5d->get_full()), *(GridMod5d->get_rb()),
 | 
			
		||||
                                                      this->Par_.mq1, this->Par_.mq2, this->Par_.mq3,
 | 
			
		||||
                                                      this->Par_.shift, this->Par_.pm, this->Par_.M5));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
} // QCD
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //FERMIONOPERATOR_MODULES_H
 | 
			
		||||
@@ -1,39 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/Modules.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
char gauge_string[]      = "gauge";
 | 
			
		||||
char cp_string[]         = "CheckPointer";
 | 
			
		||||
char hmc_string[]        = "HMC";
 | 
			
		||||
char observable_string[] = "Observable";
 | 
			
		||||
char solver_string[]     = "Solver";
 | 
			
		||||
char fermionop_string[]  = "FermionOperator";
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,130 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef HMC_MODULES_H
 | 
			
		||||
#define HMC_MODULES_H
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
Define loadable, serializable modules
 | 
			
		||||
for the HMC execution
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
// Empty class for no parameters
 | 
			
		||||
class NoParameters{};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
Base class for modules with parameters
 | 
			
		||||
*/
 | 
			
		||||
template < class P >
 | 
			
		||||
class Parametrized{
 | 
			
		||||
public:
 | 
			
		||||
  typedef P Parameters;
 | 
			
		||||
 | 
			
		||||
  Parametrized(Parameters Par):Par_(Par){};
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  Parametrized(Reader<ReaderClass> & R, std::string section_name = "parameters"){
 | 
			
		||||
    read(R, section_name, Par_);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void set_parameters(Parameters Par){
 | 
			
		||||
        Par_ = Par;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void print_parameters(){
 | 
			
		||||
    std::cout << Par_ << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
  Parameters Par_;
 | 
			
		||||
private:
 | 
			
		||||
  std::string section_name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
class Parametrized<NoParameters>{
 | 
			
		||||
        public:
 | 
			
		||||
  typedef NoParameters Parameters;
 | 
			
		||||
 | 
			
		||||
  Parametrized(Parameters Par){};
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  Parametrized(Reader<ReaderClass> & Reader){};
 | 
			
		||||
 | 
			
		||||
  void set_parameters(Parameters Par){}
 | 
			
		||||
 | 
			
		||||
  void print_parameters(){}
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
// Lowest level abstract module class
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
template <class Prod>
 | 
			
		||||
class HMCModuleBase {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef Prod Product;
 | 
			
		||||
 | 
			
		||||
  virtual Prod* getPtr() = 0;
 | 
			
		||||
 | 
			
		||||
  // add a getReference? 
 | 
			
		||||
  
 | 
			
		||||
  virtual void print_parameters(){};  // default to nothing
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////
 | 
			
		||||
// Registration class
 | 
			
		||||
/////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class T, class TheFactory>
 | 
			
		||||
class Registrar {
 | 
			
		||||
 public:
 | 
			
		||||
  Registrar(std::string className) {
 | 
			
		||||
    // register the class factory function
 | 
			
		||||
    TheFactory::getInstance().registerBuilder(className, 
 | 
			
		||||
        [&](typename TheFactory::TheReader Reader)
 | 
			
		||||
        { 
 | 
			
		||||
          return std::unique_ptr<T>(new T(Reader));
 | 
			
		||||
        }
 | 
			
		||||
        );
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //HMC_MODULES_H
 | 
			
		||||
@@ -1,158 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/ObservableModules.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef HMC_OBSERVABLE_MODULES_H
 | 
			
		||||
#define HMC_OBSERVABLE_MODULES_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
/////////////////////////////
 | 
			
		||||
// Observables
 | 
			
		||||
/////////////////////////////
 | 
			
		||||
template <class ObservableType, class OPar>
 | 
			
		||||
class ObservableModule
 | 
			
		||||
    : public Parametrized<OPar>,
 | 
			
		||||
      public HMCModuleBase< QCD::HmcObservable<typename ObservableType::Field> > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef HMCModuleBase< QCD::HmcObservable< typename ObservableType::Field> > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
  typedef OPar Parameters;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<ObservableType> ObservablePtr;
 | 
			
		||||
 | 
			
		||||
  ObservableModule(OPar Par) : Parametrized<OPar>(Par) {}
 | 
			
		||||
 | 
			
		||||
  virtual void print_parameters(){
 | 
			
		||||
    Parametrized<OPar>::print_parameters();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  ObservableModule(Reader<ReaderClass>& Reader) : Parametrized<OPar>(Reader){};
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!ObservablePtr) initialize();
 | 
			
		||||
 | 
			
		||||
    return ObservablePtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////
 | 
			
		||||
// Modules
 | 
			
		||||
////////////////
 | 
			
		||||
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
//// Observables module
 | 
			
		||||
class PlaquetteObsParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(PlaquetteObsParameters, 
 | 
			
		||||
    std::string, output_prefix);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template < class Impl >
 | 
			
		||||
class PlaquetteMod: public ObservableModule<PlaquetteLogger<Impl>, NoParameters>{
 | 
			
		||||
  typedef ObservableModule<PlaquetteLogger<Impl>, NoParameters> ObsBase;
 | 
			
		||||
  using ObsBase::ObsBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ObservablePtr.reset(new PlaquetteLogger<Impl>());
 | 
			
		||||
  }
 | 
			
		||||
  public:
 | 
			
		||||
  PlaquetteMod(): ObsBase(NoParameters()){}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template < class Impl >
 | 
			
		||||
class PolyakovMod: public ObservableModule<PolyakovLogger<Impl>, NoParameters>{
 | 
			
		||||
  typedef ObservableModule<PolyakovLogger<Impl>, NoParameters> ObsBase;
 | 
			
		||||
  using ObsBase::ObsBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ObservablePtr.reset(new PolyakovLogger<Impl>());
 | 
			
		||||
  }
 | 
			
		||||
  public:
 | 
			
		||||
  PolyakovMod(): ObsBase(NoParameters()){}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template < class Impl >
 | 
			
		||||
class TopologicalChargeMod: public ObservableModule<TopologicalCharge<Impl>, TopologyObsParameters>{
 | 
			
		||||
  typedef ObservableModule<TopologicalCharge<Impl>, TopologyObsParameters> ObsBase;
 | 
			
		||||
  using ObsBase::ObsBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->ObservablePtr.reset(new TopologicalCharge<Impl>(this->Par_));
 | 
			
		||||
  }
 | 
			
		||||
  public:
 | 
			
		||||
  TopologicalChargeMod(TopologyObsParameters Par): ObsBase(Par){}
 | 
			
		||||
  TopologicalChargeMod(): ObsBase(){}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}// QCD temporarily here
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
// Factories specialisations
 | 
			
		||||
////////////////////////////////////////
 | 
			
		||||
// explicit ref to LatticeGaugeField must be changed or put in the factory
 | 
			
		||||
//typedef HMCModuleBase< QCD::HmcObservable<QCD::LatticeGaugeField> > HMC_ObsModBase;
 | 
			
		||||
 | 
			
		||||
template <char const *str, class Field, class ReaderClass >
 | 
			
		||||
class HMC_ObservablesModuleFactory
 | 
			
		||||
    : public Factory < HMCModuleBase< QCD::HmcObservable<Field> >, Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef Reader<ReaderClass> TheReader; 
 | 
			
		||||
  // use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  HMC_ObservablesModuleFactory(const HMC_ObservablesModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMC_ObservablesModuleFactory& e) = delete;
 | 
			
		||||
  static HMC_ObservablesModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMC_ObservablesModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMC_ObservablesModuleFactory(void) = default;
 | 
			
		||||
    std::string obj_type() const {
 | 
			
		||||
    return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern char observable_string[];
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //HMC_OBSERVABLE_MODULES_H
 | 
			
		||||
@@ -1,129 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/Registration.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef MODULES_REGISTRATION_H
 | 
			
		||||
#define MODULES_REGISTRATION_H
 | 
			
		||||
 | 
			
		||||
// simplify with macros
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Actions
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
typedef QCD::WilsonGModule<ImplementationPolicy> WilsonGMod;
 | 
			
		||||
typedef QCD::SymanzikGModule<ImplementationPolicy> SymanzikGMod;
 | 
			
		||||
typedef QCD::IwasakiGModule<ImplementationPolicy> IwasakiGMod;
 | 
			
		||||
typedef QCD::DBW2GModule<ImplementationPolicy> DBW2GMod;
 | 
			
		||||
typedef QCD::RBCGModule<ImplementationPolicy> RBCGMod;
 | 
			
		||||
typedef QCD::PlaqPlusRectangleGModule<ImplementationPolicy> PlaqPlusRectangleGMod;
 | 
			
		||||
 | 
			
		||||
static Registrar<QCD::WilsonGMod,            HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __WGmodXMLInit("Wilson"); 
 | 
			
		||||
static Registrar<QCD::SymanzikGMod,          HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __SymGmodXMLInit("Symanzik"); 
 | 
			
		||||
static Registrar<QCD::IwasakiGMod,           HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __IwGmodXMLInit("Iwasaki"); 
 | 
			
		||||
static Registrar<QCD::DBW2GMod,              HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __DBW2GmodXMLInit("DBW2"); 
 | 
			
		||||
static Registrar<QCD::RBCGMod,               HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __RBCGmodXMLInit("RBC"); 
 | 
			
		||||
static Registrar<QCD::PlaqPlusRectangleGMod, HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __PPRectGmodXMLInit("PlaqPlusRect"); 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// FIXME more general implementation
 | 
			
		||||
static Registrar<QCD::TwoFlavourFModule<FermionImplementationPolicy> ,      
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourFmodXMLInit("TwoFlavours"); 
 | 
			
		||||
static Registrar<QCD::TwoFlavourRatioFModule<FermionImplementationPolicy> , 
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourRatioFmodXMLInit("TwoFlavoursRatio"); 
 | 
			
		||||
static Registrar<QCD::TwoFlavourEOFModule<FermionImplementationPolicy> ,    
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourEOFmodXMLInit("TwoFlavoursEvenOdd"); 
 | 
			
		||||
static Registrar<QCD::TwoFlavourRatioEOFModule<FermionImplementationPolicy>,
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __TwoFlavourRatioEOFmodXMLInit("TwoFlavoursEvenOddRatio"); 
 | 
			
		||||
static Registrar<QCD::OneFlavourFModule<FermionImplementationPolicy> ,      
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourFmodXMLInit("OneFlavour"); 
 | 
			
		||||
static Registrar<QCD::OneFlavourEOFModule<FermionImplementationPolicy> ,    
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourEOFmodXMLInit("OneFlavourEvenOdd"); 
 | 
			
		||||
static Registrar<QCD::OneFlavourRatioFModule<FermionImplementationPolicy> , 
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourRatioFmodXMLInit("OneFlavourRatio"); 
 | 
			
		||||
static Registrar<QCD::OneFlavourRatioEOFModule<FermionImplementationPolicy>,
 | 
			
		||||
	HMC_ActionModuleFactory<gauge_string, typename ImplementationPolicy::Field, Serialiser> > __OneFlavourRatioEOFmodXMLInit("OneFlavourEvenOddRatio"); 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Solvers
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Now a specific registration with a fermion field
 | 
			
		||||
// here must instantiate CG and CR for every new fermion field type (macro!!)
 | 
			
		||||
 | 
			
		||||
static Registrar< ConjugateGradientModule<QCD::WilsonFermionR::FermionField>,   
 | 
			
		||||
                  HMC_SolverModuleFactory<solver_string, QCD::WilsonFermionR::FermionField, Serialiser> > __CGWFmodXMLInit("ConjugateGradient"); 
 | 
			
		||||
static Registrar< ConjugateResidualModule<QCD::WilsonFermionR::FermionField>,   
 | 
			
		||||
                  HMC_SolverModuleFactory<solver_string, QCD::WilsonFermionR::FermionField, Serialiser> > __CRWFmodXMLInit("ConjugateResidual"); 
 | 
			
		||||
 | 
			
		||||
// add the staggered, scalar versions here
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Fermion operators
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
static Registrar< QCD::WilsonFermionModule<FermionImplementationPolicy>,   
 | 
			
		||||
                  HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __WilsonFOPmodXMLInit("Wilson"); 
 | 
			
		||||
static Registrar< QCD::MobiusFermionModule<FermionImplementationPolicy>,   
 | 
			
		||||
                  HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __MobiusFOPmodXMLInit("Mobius");
 | 
			
		||||
static Registrar< QCD::DomainWallFermionModule<FermionImplementationPolicy>,   
 | 
			
		||||
                  HMC_FermionOperatorModuleFactory<fermionop_string, FermionImplementationPolicy, Serialiser> > __DWFOPmodXMLInit("DomainWall");
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Observables
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
static Registrar<QCD::PlaquetteMod<ImplementationPolicy>, HMC_ObservablesModuleFactory<observable_string, typename ImplementationPolicy::Field, Serialiser> > __OBSPLmodXMLInit("Plaquette"); 
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Checkpointers
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
static Registrar<QCD::BinaryCPModule<ImplementationPolicy>, HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPBinarymodXMLInit("Binary");
 | 
			
		||||
static Registrar<QCD::NerscCPModule<ImplementationPolicy> , HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPNerscmodXMLInit("Nersc");
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
static Registrar<QCD::ILDGCPModule<ImplementationPolicy>  , HMC_CPModuleFactory<cp_string, ImplementationPolicy, Serialiser> > __CPILDGmodXMLInit("ILDG");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Integrators
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
static Registrar< HMCLeapFrog<ImplementationPolicy, RepresentationPolicy, Serialiser>      , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCLFmodXMLInit("LeapFrog");
 | 
			
		||||
static Registrar< HMCMinimumNorm2<ImplementationPolicy, RepresentationPolicy, Serialiser>  , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCMN2modXMLInit("MinimumNorm2");
 | 
			
		||||
static Registrar< HMCForceGradient<ImplementationPolicy, RepresentationPolicy, Serialiser> , HMCRunnerModuleFactory<hmc_string, Serialiser> > __HMCFGmodXMLInit("ForceGradient");
 | 
			
		||||
 | 
			
		||||
typedef HMCRunnerModuleFactory<hmc_string, Serialiser > HMCModuleFactory;
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,138 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/SolverModules.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef SOLVER_MODULES_H
 | 
			
		||||
#define SOLVER_MODULES_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
//       Operator Functions (Solvers)
 | 
			
		||||
//////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <template <typename> class SolverType, class Field, class SPar>
 | 
			
		||||
class SolverModule
 | 
			
		||||
    : public Parametrized<SPar>,
 | 
			
		||||
      public HMCModuleBase<OperatorFunction<Field> > {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef HMCModuleBase< OperatorFunction<Field> > Base;
 | 
			
		||||
  typedef typename Base::Product Product;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr< SolverType<Field> > SolverPtr;
 | 
			
		||||
 | 
			
		||||
  SolverModule(SPar Par) : Parametrized<SPar>(Par) {}
 | 
			
		||||
 | 
			
		||||
  template <class ReaderClass>
 | 
			
		||||
  SolverModule(Reader<ReaderClass>& Reader) : Parametrized<SPar>(Reader){};
 | 
			
		||||
 | 
			
		||||
  virtual void print_parameters(){
 | 
			
		||||
    std::cout << this->Par_ << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Product* getPtr() {
 | 
			
		||||
    if (!SolverPtr) initialize();
 | 
			
		||||
 | 
			
		||||
    return SolverPtr.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  virtual void initialize() = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Factory
 | 
			
		||||
template <char const *str, class Field, class ReaderClass >
 | 
			
		||||
class HMC_SolverModuleFactory
 | 
			
		||||
    : public Factory < HMCModuleBase<OperatorFunction<Field> > ,  Reader<ReaderClass> > {
 | 
			
		||||
 public:
 | 
			
		||||
  // use SINGLETON FUNCTOR MACRO HERE
 | 
			
		||||
  typedef Reader<ReaderClass> TheReader; 
 | 
			
		||||
 | 
			
		||||
  HMC_SolverModuleFactory(const HMC_SolverModuleFactory& e) = delete;
 | 
			
		||||
  void operator=(const HMC_SolverModuleFactory& e) = delete;
 | 
			
		||||
  static HMC_SolverModuleFactory& getInstance(void) {
 | 
			
		||||
    static HMC_SolverModuleFactory e;
 | 
			
		||||
    return e;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  HMC_SolverModuleFactory(void) = default;
 | 
			
		||||
    std::string obj_type() const {
 | 
			
		||||
        return std::string(str);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SolverParameters : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(SolverParameters,
 | 
			
		||||
    RealD, tolerance,
 | 
			
		||||
    RealD, max_iterations);
 | 
			
		||||
  // add error on no convergence?
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SolverObjName: Serializable {
 | 
			
		||||
public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(SolverObjName, 
 | 
			
		||||
  std::string, name,
 | 
			
		||||
  SolverParameters, parameters);
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Field >
 | 
			
		||||
class ConjugateGradientModule: public SolverModule<ConjugateGradient, Field, SolverParameters> {
 | 
			
		||||
  typedef SolverModule<ConjugateGradient, Field, SolverParameters> SolverBase;
 | 
			
		||||
  using SolverBase::SolverBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->SolverPtr.reset(new ConjugateGradient<Field>(this->Par_.tolerance, this->Par_.max_iterations, true));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class Field >
 | 
			
		||||
class ConjugateResidualModule: public SolverModule<ConjugateResidual, Field, SolverParameters> {
 | 
			
		||||
  typedef SolverModule<ConjugateResidual, Field, SolverParameters> SolverBase;
 | 
			
		||||
  using SolverBase::SolverBase; // for constructors
 | 
			
		||||
 | 
			
		||||
  // acquire resource
 | 
			
		||||
  virtual void initialize(){
 | 
			
		||||
    this->SolverPtr.reset(new ConjugateResidual<Field>(this->Par_.tolerance, this->Par_.max_iterations));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern char solver_string[];
 | 
			
		||||
} // Grid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //SOLVER_MODULES_H
 | 
			
		||||
@@ -1,46 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef MODS_H
 | 
			
		||||
#define MODS_H
 | 
			
		||||
 | 
			
		||||
// Modules files
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/modules/Factory.h>
 | 
			
		||||
#include <Grid/qcd/modules/Modules.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/hmc/checkpointers/CheckPointerModules.h>
 | 
			
		||||
#include <Grid/qcd/modules/SolverModules.h>
 | 
			
		||||
#include <Grid/qcd/modules/FermionOperatorModules.h>
 | 
			
		||||
#include <Grid/qcd/modules/ActionModules.h>
 | 
			
		||||
#include <Grid/qcd/modules/ObservableModules.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //MODS_H
 | 
			
		||||
@@ -1,51 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/observables/hmc_observable.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef HMC_OBSERVABLE_H
 | 
			
		||||
#define HMC_OBSERVABLE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
template <class Field>
 | 
			
		||||
class HmcObservable {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void TrajectoryComplete(int traj,
 | 
			
		||||
                                  Field &U,
 | 
			
		||||
                                  GridSerialRNG &sRNG,
 | 
			
		||||
                                  GridParallelRNG &pRNG) = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#include "plaquette.h"
 | 
			
		||||
#include "topological_charge.h"
 | 
			
		||||
#include "polyakov_loop.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif  //  HMC_OBSERVABLE_H
 | 
			
		||||
@@ -1,68 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/plaquette.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef HMC_PLAQUETTE_H
 | 
			
		||||
#define HMC_PLAQUETTE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// this is only defined for a gauge theory
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class PlaquetteLogger : public HmcObservable<typename Impl::Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  // here forces the Impl to be of gauge fields
 | 
			
		||||
  // if not the compiler will complain
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
  // necessary for HmcObservable compatibility
 | 
			
		||||
  typedef typename Impl::Field Field;
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj,
 | 
			
		||||
                          Field &U,
 | 
			
		||||
                          GridSerialRNG &sRNG,
 | 
			
		||||
                          GridParallelRNG &pRNG) {
 | 
			
		||||
 | 
			
		||||
    RealD plaq = WilsonLoops<Impl>::avgPlaquette(U);
 | 
			
		||||
 | 
			
		||||
    int def_prec = std::cout.precision();
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage
 | 
			
		||||
        << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 | 
			
		||||
        << "Plaquette: [ " << traj << " ] "<< plaq << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout.precision(def_prec);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace QCD
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif  // HMC_PLAQUETTE_H
 | 
			
		||||
@@ -1,68 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/polyakov_line.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: David Preti <david.preti@csic.es>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef HMC_POLYAKOV_H
 | 
			
		||||
#define HMC_POLYAKOV_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
// this is only defined for a gauge theory
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class PolyakovLogger : public HmcObservable<typename Impl::Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  // here forces the Impl to be of gauge fields
 | 
			
		||||
  // if not the compiler will complain
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
  // necessary for HmcObservable compatibility
 | 
			
		||||
  typedef typename Impl::Field Field;
 | 
			
		||||
 | 
			
		||||
  void TrajectoryComplete(int traj,
 | 
			
		||||
                          Field &U,
 | 
			
		||||
                          GridSerialRNG &sRNG,
 | 
			
		||||
                          GridParallelRNG &pRNG) {
 | 
			
		||||
 | 
			
		||||
    ComplexD polyakov = WilsonLoops<Impl>::avgPolyakovLoop(U);
 | 
			
		||||
 | 
			
		||||
    int def_prec = std::cout.precision();
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage
 | 
			
		||||
        << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 | 
			
		||||
        << "Polyakov Loop: [ " << traj << " ] "<< polyakov << std::endl;
 | 
			
		||||
 | 
			
		||||
    std::cout.precision(def_prec);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace QCD
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif  // HMC_POLYAKOV_H
 | 
			
		||||
@@ -1,122 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/topological_charge.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef HMC_TOP_CHARGE_H
 | 
			
		||||
#define HMC_TOP_CHARGE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
struct TopologySmearingParameters : Serializable {
 | 
			
		||||
    GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters,
 | 
			
		||||
    int, steps,
 | 
			
		||||
    float, step_size,
 | 
			
		||||
    int, meas_interval,
 | 
			
		||||
    float, maxTau);
 | 
			
		||||
 | 
			
		||||
    TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f):
 | 
			
		||||
        steps(s), step_size(ss), meas_interval(mi), maxTau(mT){}
 | 
			
		||||
 | 
			
		||||
    template < class ReaderClass >
 | 
			
		||||
    TopologySmearingParameters(Reader<ReaderClass>& Reader){
 | 
			
		||||
        read(Reader, "Smearing", *this);  
 | 
			
		||||
    }  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
struct TopologyObsParameters : Serializable {
 | 
			
		||||
    GRID_SERIALIZABLE_CLASS_MEMBERS(TopologyObsParameters,
 | 
			
		||||
      int, interval,
 | 
			
		||||
      bool, do_smearing,
 | 
			
		||||
      TopologySmearingParameters, Smearing);  
 | 
			
		||||
 | 
			
		||||
    TopologyObsParameters(int interval = 1, bool smearing = false):
 | 
			
		||||
        interval(interval), Smearing(smearing){}
 | 
			
		||||
 | 
			
		||||
    template <class ReaderClass >
 | 
			
		||||
      TopologyObsParameters(Reader<ReaderClass>& Reader){
 | 
			
		||||
        read(Reader, "TopologyMeasurement", *this);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// this is only defined for a gauge theory
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class TopologicalCharge : public HmcObservable<typename Impl::Field> {
 | 
			
		||||
    TopologyObsParameters Pars;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
    // here forces the Impl to be of gauge fields
 | 
			
		||||
    // if not the compiler will complain
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
    // necessary for HmcObservable compatibility
 | 
			
		||||
    typedef typename Impl::Field Field;
 | 
			
		||||
 | 
			
		||||
    TopologicalCharge(int interval = 1, bool do_smearing = false):
 | 
			
		||||
        Pars(interval, do_smearing){}
 | 
			
		||||
    
 | 
			
		||||
    TopologicalCharge(TopologyObsParameters P):Pars(P){
 | 
			
		||||
        std::cout << GridLogDebug << "Creating TopologicalCharge " << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void TrajectoryComplete(int traj,
 | 
			
		||||
                            Field &U,
 | 
			
		||||
                            GridSerialRNG &sRNG,
 | 
			
		||||
                            GridParallelRNG &pRNG) {
 | 
			
		||||
 | 
			
		||||
    if (traj%Pars.interval == 0){
 | 
			
		||||
        // Smearing
 | 
			
		||||
        Field Usmear = U;
 | 
			
		||||
        int def_prec = std::cout.precision();
 | 
			
		||||
        
 | 
			
		||||
        if (Pars.do_smearing){
 | 
			
		||||
            // using wilson flow by default here
 | 
			
		||||
            WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
 | 
			
		||||
            WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
 | 
			
		||||
            Real T0   = WF.energyDensityPlaquette(Usmear);
 | 
			
		||||
            std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 | 
			
		||||
                      << "T0                : [ " << traj << " ] "<< T0 << std::endl;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Real q    = WilsonLoops<Impl>::TopologicalCharge(Usmear);
 | 
			
		||||
        std::cout << GridLogMessage
 | 
			
		||||
            << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 | 
			
		||||
            << "Topological Charge: [ " << traj << " ] "<< q << std::endl;
 | 
			
		||||
 | 
			
		||||
        std::cout.precision(def_prec);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  //  HMC_TOP_CHARGE_H
 | 
			
		||||
@@ -1,44 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/plaquette.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
/*
 | 
			
		||||
  @brief Declares base smearing class Smear
 | 
			
		||||
 */
 | 
			
		||||
#ifndef BASE_SMEAR_
 | 
			
		||||
#define BASE_SMEAR_
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
class Smear{
 | 
			
		||||
public:
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl) // inherits the types for the gauge fields
 | 
			
		||||
 | 
			
		||||
  virtual ~Smear(){}
 | 
			
		||||
  virtual void smear     (GaugeField&,const GaugeField&)const = 0;
 | 
			
		||||
  virtual void derivative(GaugeField&, const GaugeField&,const GaugeField&) const = 0;
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,213 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/modules/plaquette.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef WILSONFLOW_H
 | 
			
		||||
#define WILSONFLOW_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
class WilsonFlow: public Smear<Gimpl>{
 | 
			
		||||
    unsigned int Nstep;
 | 
			
		||||
    unsigned int measure_interval;
 | 
			
		||||
    mutable RealD epsilon, taus;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    mutable WilsonGaugeAction<Gimpl> SG;
 | 
			
		||||
 | 
			
		||||
    void evolve_step(typename Gimpl::GaugeField&) const;
 | 
			
		||||
    void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
 | 
			
		||||
    RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Gimpl)
 | 
			
		||||
 | 
			
		||||
    explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
 | 
			
		||||
        Nstep(Nstep),
 | 
			
		||||
        epsilon(epsilon),
 | 
			
		||||
        measure_interval(interval),
 | 
			
		||||
        SG(WilsonGaugeAction<Gimpl>(3.0)) {
 | 
			
		||||
            // WilsonGaugeAction with beta 3.0
 | 
			
		||||
            assert(epsilon > 0.0);
 | 
			
		||||
            LogMessage();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void LogMessage() {
 | 
			
		||||
        std::cout << GridLogMessage
 | 
			
		||||
            << "[WilsonFlow] Nstep   : " << Nstep << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage
 | 
			
		||||
            << "[WilsonFlow] epsilon : " << epsilon << std::endl;
 | 
			
		||||
        std::cout << GridLogMessage
 | 
			
		||||
            << "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void smear(GaugeField&, const GaugeField&) const;
 | 
			
		||||
 | 
			
		||||
    virtual void derivative(GaugeField&, const GaugeField&, const GaugeField&) const {
 | 
			
		||||
        assert(0);
 | 
			
		||||
        // undefined for WilsonFlow
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
 | 
			
		||||
    RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
 | 
			
		||||
    RealD energyDensityPlaquette(const GaugeField& U) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Implementations
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
 | 
			
		||||
    GaugeField Z(U._grid);
 | 
			
		||||
    GaugeField tmp(U._grid);
 | 
			
		||||
    SG.deriv(U, Z);
 | 
			
		||||
    Z *= 0.25;                                  // Z0 = 1/4 * F(U)
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // U = W1 = exp(ep*Z0)*W0
 | 
			
		||||
 | 
			
		||||
    Z *= -17.0/8.0;
 | 
			
		||||
    SG.deriv(U, tmp); Z += tmp;                 // -17/32*Z0 +Z1
 | 
			
		||||
    Z *= 8.0/9.0;                               // Z = -17/36*Z0 +8/9*Z1
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // U_= W2 = exp(ep*Z)*W1
 | 
			
		||||
 | 
			
		||||
    Z *= -4.0/3.0;
 | 
			
		||||
    SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
 | 
			
		||||
    Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
 | 
			
		||||
    if (maxTau - taus < epsilon){
 | 
			
		||||
        epsilon = maxTau-taus;
 | 
			
		||||
    }
 | 
			
		||||
    //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
 | 
			
		||||
    GaugeField Z(U._grid);
 | 
			
		||||
    GaugeField Zprime(U._grid);
 | 
			
		||||
    GaugeField tmp(U._grid), Uprime(U._grid);
 | 
			
		||||
    Uprime = U;
 | 
			
		||||
    SG.deriv(U, Z);
 | 
			
		||||
    Zprime = -Z;
 | 
			
		||||
    Z *= 0.25;                                  // Z0 = 1/4 * F(U)
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // U = W1 = exp(ep*Z0)*W0
 | 
			
		||||
 | 
			
		||||
    Z *= -17.0/8.0;
 | 
			
		||||
    SG.deriv(U, tmp); Z += tmp;                 // -17/32*Z0 +Z1
 | 
			
		||||
    Zprime += 2.0*tmp;
 | 
			
		||||
    Z *= 8.0/9.0;                               // Z = -17/36*Z0 +8/9*Z1
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // U_= W2 = exp(ep*Z)*W1
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    Z *= -4.0/3.0;
 | 
			
		||||
    SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
 | 
			
		||||
    Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
 | 
			
		||||
    Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
 | 
			
		||||
 | 
			
		||||
    // Ramos 
 | 
			
		||||
    Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
 | 
			
		||||
    // Compute distance as norm^2 of the difference
 | 
			
		||||
    GaugeField diffU = U - Uprime;
 | 
			
		||||
    RealD diff = norm2(diffU);
 | 
			
		||||
    // adjust integration step
 | 
			
		||||
    
 | 
			
		||||
    taus += epsilon;
 | 
			
		||||
    //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
 | 
			
		||||
    
 | 
			
		||||
    epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
 | 
			
		||||
    //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
 | 
			
		||||
    RealD td = tau(step);
 | 
			
		||||
    return 2.0 * td * td * SG.S(U)/U._grid->gSites();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
 | 
			
		||||
    return 2.0 * taus * taus * SG.S(U)/U._grid->gSites();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//#define WF_TIMING 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
 | 
			
		||||
    out = in;
 | 
			
		||||
    for (unsigned int step = 1; step <= Nstep; step++) {
 | 
			
		||||
        auto start = std::chrono::high_resolution_clock::now();
 | 
			
		||||
        evolve_step(out);
 | 
			
		||||
        auto end = std::chrono::high_resolution_clock::now();
 | 
			
		||||
        std::chrono::duration<double> diff = end - start;
 | 
			
		||||
        #ifdef WF_TIMING
 | 
			
		||||
        std::cout << "Time to evolve " << diff.count() << " s\n";
 | 
			
		||||
        #endif
 | 
			
		||||
        std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
 | 
			
		||||
		  << step << "  " << tau(step) << "  " 
 | 
			
		||||
		  << energyDensityPlaquette(step,out) << std::endl;
 | 
			
		||||
         if( step % measure_interval == 0){
 | 
			
		||||
         std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
 | 
			
		||||
            << step << "  " 
 | 
			
		||||
            << WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Gimpl>
 | 
			
		||||
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
 | 
			
		||||
    out = in;
 | 
			
		||||
    taus = epsilon;
 | 
			
		||||
    unsigned int step = 0;
 | 
			
		||||
    do{
 | 
			
		||||
        step++;
 | 
			
		||||
        //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
 | 
			
		||||
        evolve_step_adaptive(out, maxTau);
 | 
			
		||||
        std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
 | 
			
		||||
		  << step << "  " << taus << "  "
 | 
			
		||||
		  << energyDensityPlaquette(out) << std::endl;
 | 
			
		||||
         if( step % measure_interval == 0){
 | 
			
		||||
         std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
 | 
			
		||||
            << step << "  " 
 | 
			
		||||
            << WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
 | 
			
		||||
        }
 | 
			
		||||
    } while (taus < maxTau);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}  // namespace QCD
 | 
			
		||||
}  // namespace Grid
 | 
			
		||||
 | 
			
		||||
#endif   // WILSONFLOW_H
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -1,197 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/action/scalar/CovariantLaplacian.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#ifndef COVARIANT_LAPLACIAN_H
 | 
			
		||||
#define COVARIANT_LAPLACIAN_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
struct LaplacianParams : Serializable {
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(LaplacianParams, 
 | 
			
		||||
                                  RealD, lo, 
 | 
			
		||||
                                  RealD, hi, 
 | 
			
		||||
                                  int,   MaxIter, 
 | 
			
		||||
                                  RealD, tolerance, 
 | 
			
		||||
                                  int,   degree, 
 | 
			
		||||
                                  int,   precision);
 | 
			
		||||
  
 | 
			
		||||
  // constructor 
 | 
			
		||||
  LaplacianParams(RealD lo      = 0.0, 
 | 
			
		||||
                  RealD hi      = 1.0, 
 | 
			
		||||
                  int maxit     = 1000,
 | 
			
		||||
                  RealD tol     = 1.0e-8, 
 | 
			
		||||
                  int degree    = 10,
 | 
			
		||||
                  int precision = 64)
 | 
			
		||||
    : lo(lo),
 | 
			
		||||
      hi(hi),
 | 
			
		||||
      MaxIter(maxit),
 | 
			
		||||
      tolerance(tol),
 | 
			
		||||
      degree(degree),
 | 
			
		||||
      precision(precision){};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
// Laplacian operator L on adjoint fields
 | 
			
		||||
//
 | 
			
		||||
// phi: adjoint field
 | 
			
		||||
// L: D_mu^dag D_mu
 | 
			
		||||
//
 | 
			
		||||
// L phi(x) = Sum_mu [ U_mu(x)phi(x+mu)U_mu(x)^dag + 
 | 
			
		||||
//                     U_mu(x-mu)^dag phi(x-mu)U_mu(x-mu)
 | 
			
		||||
//                     -2phi(x)]
 | 
			
		||||
//
 | 
			
		||||
// Operator designed to be encapsulated by
 | 
			
		||||
// an HermitianLinearOperator<.. , ..>
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class LaplacianAdjointField: public Metric<typename Impl::Field> {
 | 
			
		||||
  OperatorFunction<typename Impl::Field> &Solver;
 | 
			
		||||
  LaplacianParams param;
 | 
			
		||||
  MultiShiftFunction PowerHalf;    
 | 
			
		||||
  MultiShiftFunction PowerInvHalf;    
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Impl);
 | 
			
		||||
 | 
			
		||||
  LaplacianAdjointField(GridBase* grid, OperatorFunction<GaugeField>& S, LaplacianParams& p, const RealD k = 1.0)
 | 
			
		||||
      : U(Nd, grid), Solver(S), param(p), kappa(k){
 | 
			
		||||
        AlgRemez remez(param.lo,param.hi,param.precision);
 | 
			
		||||
        std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
 | 
			
		||||
        remez.generateApprox(param.degree,1,2);
 | 
			
		||||
        PowerHalf.Init(remez,param.tolerance,false);
 | 
			
		||||
        PowerInvHalf.Init(remez,param.tolerance,true);
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
  void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
 | 
			
		||||
  void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
 | 
			
		||||
 | 
			
		||||
  void ImportGauge(const GaugeField& _U) {
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      U[mu] = PeekIndex<LorentzIndex>(_U, mu);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void M(const GaugeField& in, GaugeField& out) {
 | 
			
		||||
    // in is an antihermitian matrix
 | 
			
		||||
    // test
 | 
			
		||||
    //GaugeField herm = in + adj(in);
 | 
			
		||||
    //std::cout << "AHermiticity: " << norm2(herm) << std::endl;
 | 
			
		||||
 | 
			
		||||
    GaugeLinkField tmp(in._grid);
 | 
			
		||||
    GaugeLinkField tmp2(in._grid);
 | 
			
		||||
    GaugeLinkField sum(in._grid);
 | 
			
		||||
 | 
			
		||||
    for (int nu = 0; nu < Nd; nu++) {
 | 
			
		||||
      sum = zero;
 | 
			
		||||
      GaugeLinkField in_nu = PeekIndex<LorentzIndex>(in, nu);
 | 
			
		||||
      GaugeLinkField out_nu(out._grid);
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
        tmp = U[mu] * Cshift(in_nu, mu, +1) * adj(U[mu]);
 | 
			
		||||
        tmp2 = adj(U[mu]) * in_nu * U[mu];
 | 
			
		||||
        sum += tmp + Cshift(tmp2, mu, -1) - 2.0 * in_nu;
 | 
			
		||||
      }
 | 
			
		||||
      out_nu = (1.0 - kappa) * in_nu - kappa / (double(4 * Nd)) * sum;
 | 
			
		||||
      PokeIndex<LorentzIndex>(out, out_nu, nu);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void MDeriv(const GaugeField& in, GaugeField& der) {
 | 
			
		||||
    // in is anti-hermitian
 | 
			
		||||
    RealD factor = -kappa / (double(4 * Nd));
 | 
			
		||||
    
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++){
 | 
			
		||||
      GaugeLinkField der_mu(der._grid);
 | 
			
		||||
      der_mu = zero;
 | 
			
		||||
      for (int nu = 0; nu < Nd; nu++){
 | 
			
		||||
        GaugeLinkField in_nu = PeekIndex<LorentzIndex>(in, nu);
 | 
			
		||||
        der_mu += U[mu] * Cshift(in_nu, mu, 1) * adj(U[mu]) * in_nu;
 | 
			
		||||
      }
 | 
			
		||||
      // the minus sign comes by using the in_nu instead of the
 | 
			
		||||
      // adjoint in the last multiplication
 | 
			
		||||
      PokeIndex<LorentzIndex>(der,  -2.0 * factor * der_mu, mu);
 | 
			
		||||
    } 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // separating this temporarily
 | 
			
		||||
  void MDeriv(const GaugeField& left, const GaugeField& right,
 | 
			
		||||
              GaugeField& der) {
 | 
			
		||||
    // in is anti-hermitian
 | 
			
		||||
    RealD factor = -kappa / (double(4 * Nd));
 | 
			
		||||
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      GaugeLinkField der_mu(der._grid);
 | 
			
		||||
      der_mu = zero;
 | 
			
		||||
      for (int nu = 0; nu < Nd; nu++) {
 | 
			
		||||
        GaugeLinkField left_nu = PeekIndex<LorentzIndex>(left, nu);
 | 
			
		||||
        GaugeLinkField right_nu = PeekIndex<LorentzIndex>(right, nu);
 | 
			
		||||
        der_mu += U[mu] * Cshift(left_nu, mu, 1) * adj(U[mu]) * right_nu;
 | 
			
		||||
        der_mu += U[mu] * Cshift(right_nu, mu, 1) * adj(U[mu]) * left_nu;
 | 
			
		||||
      }
 | 
			
		||||
      PokeIndex<LorentzIndex>(der, -factor * der_mu, mu);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void Minv(const GaugeField& in, GaugeField& inverted){
 | 
			
		||||
    HermitianLinearOperator<LaplacianAdjointField<Impl>,GaugeField> HermOp(*this);
 | 
			
		||||
    Solver(HermOp, in, inverted);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void MSquareRoot(GaugeField& P){
 | 
			
		||||
    GaugeField Gp(P._grid);
 | 
			
		||||
    HermitianLinearOperator<LaplacianAdjointField<Impl>,GaugeField> HermOp(*this);
 | 
			
		||||
    ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerHalf);
 | 
			
		||||
    msCG(HermOp,P,Gp);
 | 
			
		||||
    P = Gp; 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void MInvSquareRoot(GaugeField& P){
 | 
			
		||||
    GaugeField Gp(P._grid);
 | 
			
		||||
    HermitianLinearOperator<LaplacianAdjointField<Impl>,GaugeField> HermOp(*this);
 | 
			
		||||
    ConjugateGradientMultiShift<GaugeField> msCG(param.MaxIter,PowerInvHalf);
 | 
			
		||||
    msCG(HermOp,P,Gp);
 | 
			
		||||
    P = Gp; 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  RealD kappa;
 | 
			
		||||
  std::vector<GaugeLinkField> U;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,193 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    grid` physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
//#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_QCD_GAUGE_FIX_H
 | 
			
		||||
#define GRID_QCD_GAUGE_FIX_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace QCD {
 | 
			
		||||
 | 
			
		||||
template <class Gimpl> 
 | 
			
		||||
class FourierAcceleratedGaugeFixer  : public Gimpl {
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
 | 
			
		||||
  typedef typename Gimpl::GaugeLinkField GaugeMat;
 | 
			
		||||
  typedef typename Gimpl::GaugeField GaugeLorentz;
 | 
			
		||||
 | 
			
		||||
  static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) {
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      Complex cmi(0.0,-1.0);
 | 
			
		||||
      A[mu] = Ta(U[mu]) * cmi;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) {
 | 
			
		||||
    dmuAmu=zero;
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
      dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
 | 
			
		||||
    }
 | 
			
		||||
  }  
 | 
			
		||||
  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) {
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
    Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
    Real old_trace = org_link_trace;
 | 
			
		||||
    Real trG;
 | 
			
		||||
 | 
			
		||||
    std::vector<GaugeMat> U(Nd,grid);
 | 
			
		||||
                 GaugeMat dmuAmu(grid);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<maxiter;i++){
 | 
			
		||||
      for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
      if ( Fourier==false ) { 
 | 
			
		||||
	trG = SteepestDescentStep(U,alpha,dmuAmu);
 | 
			
		||||
      } else { 
 | 
			
		||||
	trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu);
 | 
			
		||||
      }
 | 
			
		||||
      for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu);
 | 
			
		||||
      // Monitor progress and convergence test 
 | 
			
		||||
      // infrequently to minimise cost overhead
 | 
			
		||||
      if ( i %20 == 0 ) { 
 | 
			
		||||
	Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
 | 
			
		||||
	Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
 | 
			
		||||
 | 
			
		||||
	if (Fourier) 
 | 
			
		||||
	  std::cout << GridLogMessage << "Fourier Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
 | 
			
		||||
	else 
 | 
			
		||||
	  std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl;
 | 
			
		||||
	
 | 
			
		||||
	Real Phi  = 1.0 - old_trace / link_trace ;
 | 
			
		||||
	Real Omega= 1.0 - trG;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl;
 | 
			
		||||
	if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) {
 | 
			
		||||
	  std::cout << GridLogMessage << "Converged ! "<<std::endl;
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	old_trace = link_trace;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
    GridBase *grid = U[0]._grid;
 | 
			
		||||
 | 
			
		||||
    std::vector<GaugeMat> A(Nd,grid);
 | 
			
		||||
    GaugeMat g(grid);
 | 
			
		||||
 | 
			
		||||
    GaugeLinkToLieAlgebraField(U,A);
 | 
			
		||||
    ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Real vol = grid->gSites();
 | 
			
		||||
    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
 | 
			
		||||
    SU<Nc>::GaugeTransform(U,g);
 | 
			
		||||
 | 
			
		||||
    return trG;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) {
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = U[0]._grid;
 | 
			
		||||
 | 
			
		||||
    Real vol = grid->gSites();
 | 
			
		||||
 | 
			
		||||
    FFT theFFT((GridCartesian *)grid);
 | 
			
		||||
 | 
			
		||||
    LatticeComplex  Fp(grid);
 | 
			
		||||
    LatticeComplex  psq(grid); psq=zero;
 | 
			
		||||
    LatticeComplex  pmu(grid); 
 | 
			
		||||
    LatticeComplex   one(grid); one = Complex(1.0,0.0);
 | 
			
		||||
 | 
			
		||||
    GaugeMat g(grid);
 | 
			
		||||
    GaugeMat dmuAmu_p(grid);
 | 
			
		||||
    std::vector<GaugeMat> A(Nd,grid);
 | 
			
		||||
 | 
			
		||||
    GaugeLinkToLieAlgebraField(U,A);
 | 
			
		||||
 | 
			
		||||
    DmuAmu(A,dmuAmu);
 | 
			
		||||
 | 
			
		||||
    theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward);
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // Work out Fp = psq_max/ psq...
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    std::vector<int> latt_size = grid->GlobalDimensions();
 | 
			
		||||
    std::vector<int> coor(grid->_ndimension,0);
 | 
			
		||||
    for(int mu=0;mu<Nd;mu++) {
 | 
			
		||||
 | 
			
		||||
      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
      LatticeCoordinate(pmu,mu);
 | 
			
		||||
      pmu = TwoPiL * pmu ;
 | 
			
		||||
      psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5); 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Complex psqMax(16.0);
 | 
			
		||||
    Fp =  psqMax*one/psq;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    static int once;
 | 
			
		||||
    if ( once == 0 ) { 
 | 
			
		||||
      std::cout << " Fp " << Fp <<std::endl;
 | 
			
		||||
      once ++;
 | 
			
		||||
      }*/
 | 
			
		||||
 | 
			
		||||
    pokeSite(TComplex(1.0),Fp,coor);
 | 
			
		||||
 | 
			
		||||
    dmuAmu_p  = dmuAmu_p * Fp; 
 | 
			
		||||
 | 
			
		||||
    theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
 | 
			
		||||
 | 
			
		||||
    GaugeMat ciadmam(grid);
 | 
			
		||||
    Complex cialpha(0.0,-alpha);
 | 
			
		||||
    ciadmam = dmuAmu*cialpha;
 | 
			
		||||
    SU<Nc>::taExp(ciadmam,g);
 | 
			
		||||
 | 
			
		||||
    Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc;
 | 
			
		||||
 | 
			
		||||
    SU<Nc>::GaugeTransform(U,g);
 | 
			
		||||
 | 
			
		||||
    return trG;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
 | 
			
		||||
    GridBase *grid = g._grid;
 | 
			
		||||
    Complex cialpha(0.0,-alpha);
 | 
			
		||||
    GaugeMat ciadmam(grid);
 | 
			
		||||
    DmuAmu(A,dmuAmu);
 | 
			
		||||
    ciadmam = dmuAmu*cialpha;
 | 
			
		||||
    SU<Nc>::taExp(ciadmam,g);
 | 
			
		||||
  }  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,226 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/qcd/hmc/integrators/Integrator.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
//--------------------------------------------------------------------
 | 
			
		||||
#ifndef METRIC_H
 | 
			
		||||
#define METRIC_H
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
namespace QCD{
 | 
			
		||||
 | 
			
		||||
template <typename Field> 
 | 
			
		||||
class Metric{
 | 
			
		||||
public:
 | 
			
		||||
  virtual void ImportGauge(const Field&)   = 0;
 | 
			
		||||
  virtual void M(const Field&, Field&)     = 0;
 | 
			
		||||
  virtual void Minv(const Field&, Field&)  = 0;
 | 
			
		||||
  virtual void MSquareRoot(Field&) = 0;
 | 
			
		||||
  virtual void MInvSquareRoot(Field&) = 0;
 | 
			
		||||
  virtual void MDeriv(const Field&, Field&) = 0;
 | 
			
		||||
  virtual void MDeriv(const Field&, const Field&, Field&) = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Need a trivial operator
 | 
			
		||||
template <typename Field>
 | 
			
		||||
class TrivialMetric : public Metric<Field>{
 | 
			
		||||
public:
 | 
			
		||||
  virtual void ImportGauge(const Field&){};
 | 
			
		||||
  virtual void M(const Field& in, Field& out){
 | 
			
		||||
    out = in;
 | 
			
		||||
  }
 | 
			
		||||
  virtual void Minv(const Field& in, Field& out){
 | 
			
		||||
    out = in;
 | 
			
		||||
  }
 | 
			
		||||
  virtual void MSquareRoot(Field& P){
 | 
			
		||||
    // do nothing
 | 
			
		||||
  }
 | 
			
		||||
  virtual void MInvSquareRoot(Field& P){
 | 
			
		||||
    // do nothing
 | 
			
		||||
  }
 | 
			
		||||
  virtual void MDeriv(const Field& in, Field& out){
 | 
			
		||||
    out = zero;
 | 
			
		||||
  }
 | 
			
		||||
  virtual void MDeriv(const Field& left, const Field& right, Field& out){
 | 
			
		||||
    out = zero;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
///////////////////////////////
 | 
			
		||||
// Generalised momenta
 | 
			
		||||
///////////////////////////////
 | 
			
		||||
 | 
			
		||||
template <typename Implementation>
 | 
			
		||||
class GeneralisedMomenta{
 | 
			
		||||
public:
 | 
			
		||||
  typedef typename Implementation::Field MomentaField;  //for readability
 | 
			
		||||
  typedef typename Implementation::GaugeLinkField MomentaLinkField;  //for readability
 | 
			
		||||
  Metric<MomentaField>& M;
 | 
			
		||||
  MomentaField Mom;
 | 
			
		||||
 | 
			
		||||
  // Auxiliary fields
 | 
			
		||||
  // not hard coded but inherit the type from the metric
 | 
			
		||||
  // created Nd new fields
 | 
			
		||||
  // hide these in the metric?
 | 
			
		||||
  //typedef Lattice<iVector<iScalar<iMatrix<vComplex, Nc> >, Nd/2 > > AuxiliaryMomentaType;
 | 
			
		||||
  MomentaField AuxMom;
 | 
			
		||||
  MomentaField AuxField;
 | 
			
		||||
 | 
			
		||||
  GeneralisedMomenta(GridBase* grid, Metric<MomentaField>& M): M(M), Mom(grid), AuxMom(grid), AuxField(grid){}
 | 
			
		||||
 | 
			
		||||
  // Correct
 | 
			
		||||
  void MomentaDistribution(GridParallelRNG& pRNG){
 | 
			
		||||
    // Generate a distribution for
 | 
			
		||||
    // P^dag G P
 | 
			
		||||
    // where G = M^-1
 | 
			
		||||
 | 
			
		||||
    // Generate gaussian momenta
 | 
			
		||||
    Implementation::generate_momenta(Mom, pRNG);
 | 
			
		||||
    // Modify the distribution with the metric
 | 
			
		||||
    M.MSquareRoot(Mom);
 | 
			
		||||
 | 
			
		||||
    if (1) {
 | 
			
		||||
      // Auxiliary momenta
 | 
			
		||||
      // do nothing if trivial, so hide in the metric
 | 
			
		||||
      MomentaField AuxMomTemp(Mom._grid);
 | 
			
		||||
      Implementation::generate_momenta(AuxMom, pRNG);
 | 
			
		||||
      Implementation::generate_momenta(AuxField, pRNG);
 | 
			
		||||
      // Modify the distribution with the metric
 | 
			
		||||
      // Aux^dag M Aux
 | 
			
		||||
      M.MInvSquareRoot(AuxMom);  // AuxMom = M^{-1/2} AuxMomTemp
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Correct
 | 
			
		||||
  RealD MomentaAction(){
 | 
			
		||||
    MomentaField inv(Mom._grid);
 | 
			
		||||
    inv = zero;
 | 
			
		||||
    M.Minv(Mom, inv);
 | 
			
		||||
    LatticeComplex Hloc(Mom._grid);
 | 
			
		||||
    Hloc = zero;
 | 
			
		||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
      // This is not very general
 | 
			
		||||
      // hide in the metric
 | 
			
		||||
      auto Mom_mu = PeekIndex<LorentzIndex>(Mom, mu);
 | 
			
		||||
      auto inv_mu = PeekIndex<LorentzIndex>(inv, mu);
 | 
			
		||||
      Hloc += trace(Mom_mu * inv_mu);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (1) {
 | 
			
		||||
      // Auxiliary Fields
 | 
			
		||||
      // hide in the metric
 | 
			
		||||
      M.M(AuxMom, inv);
 | 
			
		||||
      for (int mu = 0; mu < Nd; mu++) {
 | 
			
		||||
        // This is not very general
 | 
			
		||||
        // hide in the operators
 | 
			
		||||
        auto inv_mu = PeekIndex<LorentzIndex>(inv, mu);
 | 
			
		||||
        auto am_mu = PeekIndex<LorentzIndex>(AuxMom, mu);
 | 
			
		||||
        auto af_mu = PeekIndex<LorentzIndex>(AuxField, mu);
 | 
			
		||||
        Hloc += trace(am_mu * inv_mu);// p M p
 | 
			
		||||
        Hloc += trace(af_mu * af_mu);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Complex Hsum = sum(Hloc);
 | 
			
		||||
    return Hsum.real();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Correct
 | 
			
		||||
  void DerivativeU(MomentaField& in, MomentaField& der){
 | 
			
		||||
 | 
			
		||||
    // Compute the derivative of the kinetic term
 | 
			
		||||
    // with respect to the gauge field
 | 
			
		||||
    MomentaField MDer(in._grid);
 | 
			
		||||
    MomentaField X(in._grid);
 | 
			
		||||
    X = zero;
 | 
			
		||||
    M.Minv(in, X);  // X = G in
 | 
			
		||||
    M.MDeriv(X, MDer);  // MDer = U * dS/dU
 | 
			
		||||
    der = Implementation::projectForce(MDer);  // Ta if gauge fields
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void AuxiliaryFieldsDerivative(MomentaField& der){
 | 
			
		||||
    der = zero;
 | 
			
		||||
    if (1){
 | 
			
		||||
    // Auxiliary fields
 | 
			
		||||
    MomentaField der_temp(der._grid);
 | 
			
		||||
    MomentaField X(der._grid);
 | 
			
		||||
    X=zero;
 | 
			
		||||
    //M.M(AuxMom, X); // X = M Aux
 | 
			
		||||
    // Two derivative terms
 | 
			
		||||
    // the Mderiv need separation of left and right terms
 | 
			
		||||
    M.MDeriv(AuxMom, der); 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // this one should not be necessary (identical to the previous one)
 | 
			
		||||
    //M.MDeriv(X, AuxMom, der_temp); der += der_temp;
 | 
			
		||||
 | 
			
		||||
    der = -1.0*Implementation::projectForce(der);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void DerivativeP(MomentaField& der){
 | 
			
		||||
    der = zero;
 | 
			
		||||
    M.Minv(Mom, der);
 | 
			
		||||
    // is the projection necessary here?
 | 
			
		||||
    // no for fields in the algebra
 | 
			
		||||
    der = Implementation::projectForce(der); 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void update_auxiliary_momenta(RealD ep){
 | 
			
		||||
    if(1){
 | 
			
		||||
      AuxMom -= ep * AuxField;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void update_auxiliary_fields(RealD ep){
 | 
			
		||||
    if (1) {
 | 
			
		||||
      MomentaField tmp(AuxMom._grid);
 | 
			
		||||
      MomentaField tmp2(AuxMom._grid);
 | 
			
		||||
      M.M(AuxMom, tmp);
 | 
			
		||||
      // M.M(tmp, tmp2);
 | 
			
		||||
      AuxField += ep * tmp;  // M^2 AuxMom
 | 
			
		||||
      // factor of 2?
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif //METRIC_H
 | 
			
		||||
@@ -1,96 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/utils/WilsonLoops.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef SCALAR_OBJS_H
 | 
			
		||||
#define SCALAR_OBJS_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // FIXME drop the QCD namespace in Nd
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
// Scalar field obs
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class ScalarObs {
 | 
			
		||||
 public:
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // squared field
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  static void phisquared(typename Impl::Field &fsq,
 | 
			
		||||
                         const typename Impl::Field &f) {
 | 
			
		||||
    fsq = f * f;
 | 
			
		||||
  }
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // phi^4 interaction term
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  static void phifourth(typename Impl::Field &fsq,
 | 
			
		||||
                        const typename Impl::Field &f) {
 | 
			
		||||
    fsq = f * f * f * f;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // phi(x)phi(x+mu)
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  static void phider(typename Impl::Field &fsq,
 | 
			
		||||
                     const typename Impl::Field &f) {
 | 
			
		||||
    fsq = Cshift(f, 0, -1) * f;
 | 
			
		||||
    for (int mu = 1; mu < QCD::Nd; mu++) fsq += Cshift(f, mu, -1) * f;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
  // Vol sum of the previous obs.
 | 
			
		||||
  //////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  static RealD sumphider(const typename Impl::Field &f) {
 | 
			
		||||
    typename Impl::Field tmp(f._grid);
 | 
			
		||||
    tmp = Cshift(f, 0, -1) * f;
 | 
			
		||||
    for (int mu = 1; mu < QCD::Nd; mu++) {
 | 
			
		||||
      tmp += Cshift(f, mu, -1) * f;
 | 
			
		||||
    }
 | 
			
		||||
    return -sum(trace(tmp));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static RealD sumphisquared(const typename Impl::Field &f) {
 | 
			
		||||
    typename Impl::Field tmp(f._grid);
 | 
			
		||||
    tmp = f * f;
 | 
			
		||||
    return sum(trace(tmp));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static RealD sumphifourth(const typename Impl::Field &f) {
 | 
			
		||||
    typename Impl::Field tmp(f._grid);
 | 
			
		||||
    phifourth(tmp, f);
 | 
			
		||||
    return sum(trace(tmp));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,173 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/serialisation/JSON_IO.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
    Author: Guido Cossu<guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 | 
			
		||||
// Writer implementation ///////////////////////////////////////////////////////
 | 
			
		||||
JSONWriter::JSONWriter(const std::string &fileName)
 | 
			
		||||
: fileName_(fileName), ss_("{ ", std::ostringstream::ate){}
 | 
			
		||||
 | 
			
		||||
JSONWriter::~JSONWriter(void)
 | 
			
		||||
{
 | 
			
		||||
  // close
 | 
			
		||||
  delete_comma();
 | 
			
		||||
  ss_ << "}";  
 | 
			
		||||
 | 
			
		||||
  // write prettified JSON to file
 | 
			
		||||
  std::ofstream os(fileName_);
 | 
			
		||||
  //std::cout << "JSONWriter::~JSONWriter" << std::endl;
 | 
			
		||||
  os << std::setw(2) << json::parse(ss_.str()) << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void JSONWriter::push(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  // adding a nested object
 | 
			
		||||
  if (s.size())
 | 
			
		||||
    ss_ << " \""<<s<<"\" : {";
 | 
			
		||||
  else
 | 
			
		||||
    ss_ << " {";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void JSONWriter::pop(void)
 | 
			
		||||
{
 | 
			
		||||
  //std::cout << "JSONWriter::pop" << std::endl;
 | 
			
		||||
  delete_comma();
 | 
			
		||||
  ss_ << "},";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void JSONWriter::delete_comma()
 | 
			
		||||
{
 | 
			
		||||
  std::string dlast = ss_.str();
 | 
			
		||||
  dlast.pop_back(); // deletes the last comma
 | 
			
		||||
  ss_.str(dlast);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// here we are hitting a g++ bug (Bug 56480)
 | 
			
		||||
// compiles fine with clang
 | 
			
		||||
// have to wrap in the Grid namespace
 | 
			
		||||
// annoying, but necessary for TravisCI
 | 
			
		||||
namespace Grid
 | 
			
		||||
{
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s,	const std::string &x)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(string) : " << s <<  std::endl;
 | 
			
		||||
    std::ostringstream os;
 | 
			
		||||
    os << std::boolalpha << x;
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ;
 | 
			
		||||
    else
 | 
			
		||||
     ss_ << os.str() << " ," ;
 | 
			
		||||
  }
 | 
			
		||||
}// namespace Grid 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Reader implementation ///////////////////////////////////////////////////////
 | 
			
		||||
JSONReader::JSONReader(const std::string &fileName)
 | 
			
		||||
: fileName_(fileName)
 | 
			
		||||
{
 | 
			
		||||
  std::ifstream file(fileName_);
 | 
			
		||||
  file >> jobject_;
 | 
			
		||||
 | 
			
		||||
  // test
 | 
			
		||||
  // serialize to standard output
 | 
			
		||||
  //std::cout << "JSONReader::JSONReader : " << jobject_ << endl; 
 | 
			
		||||
  jcur_ = jobject_;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool JSONReader::push(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  if (s.size()){
 | 
			
		||||
    jold_.push_back(jcur_);
 | 
			
		||||
    do_pop.push_back(true);
 | 
			
		||||
    try
 | 
			
		||||
    {
 | 
			
		||||
      jcur_ = jcur_[s]; 
 | 
			
		||||
    }
 | 
			
		||||
    catch (std::out_of_range& e)
 | 
			
		||||
    {
 | 
			
		||||
      std::cout << "out of range: " << e.what() << '\n';
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    //cout << "JSONReader::push : " << s << " : "<< jcur_ << endl;
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    do_pop.push_back(false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void JSONReader::pop(void)
 | 
			
		||||
{
 | 
			
		||||
  if (do_pop.back()){
 | 
			
		||||
    jcur_ = jold_.back();
 | 
			
		||||
    jold_.pop_back();
 | 
			
		||||
    do_pop.pop_back();
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
    do_pop.pop_back();
 | 
			
		||||
 | 
			
		||||
  //cout << "JSONReader::pop : " << jcur_ << endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool JSONReader::nextElement(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  // Work in progress
 | 
			
		||||
  // JSON dictionaries do not support multiple names 
 | 
			
		||||
  // Same name objects must be packed in vectors
 | 
			
		||||
  ++it_;
 | 
			
		||||
  
 | 
			
		||||
  //if (it_ == it_end_){
 | 
			
		||||
  //  return false;
 | 
			
		||||
  //}
 | 
			
		||||
 | 
			
		||||
  jcur_ = *it_; 
 | 
			
		||||
  //cout << "JSONReader::nextElement(string) : " << s << " : "<< jcur_ << endl;
 | 
			
		||||
  //return true;
 | 
			
		||||
 | 
			
		||||
    return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void JSONReader::readDefault(const std::string &s, std::string &output)
 | 
			
		||||
{
 | 
			
		||||
  //cout << "JSONReader::readDefault(string) : " << s<< " " << jcur_ << endl;
 | 
			
		||||
  if (s.size()){
 | 
			
		||||
    //std::cout << "String: "<< jcur_[s] << std::endl;
 | 
			
		||||
    output = jcur_[s];
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "String: "<< jcur_ << std::endl;
 | 
			
		||||
    output = jcur_;    
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -1,262 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/serialisation/JSON_IO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
		Author: Guido Cossu<guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SERIALISATION_JSON_IO_H
 | 
			
		||||
#define GRID_SERIALISATION_JSON_IO_H
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
#include <Grid/json/json.hpp>
 | 
			
		||||
 | 
			
		||||
// for convenience
 | 
			
		||||
using json = nlohmann::json;
 | 
			
		||||
 | 
			
		||||
namespace Grid
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  class JSONWriter: public Writer<JSONWriter>
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
    JSONWriter(const std::string &fileName);
 | 
			
		||||
    virtual ~JSONWriter(void);
 | 
			
		||||
    void push(const std::string &s);
 | 
			
		||||
    void pop(void);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void writeDefault(const std::string &s, const U &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void writeDefault(const std::string &s, const std::complex<U> &x);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void writeDefault(const std::string &s, const std::vector<U> &x);
 | 
			
		||||
    template <typename U, typename P>
 | 
			
		||||
    void writeDefault(const std::string &s, const std::pair<U,P> &x);
 | 
			
		||||
 | 
			
		||||
    template<std::size_t N>
 | 
			
		||||
    void writeDefault(const std::string &s, const char(&x)[N]);
 | 
			
		||||
 | 
			
		||||
    void writeDefault(const std::string &s, const std::string &x);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  private:
 | 
			
		||||
    void delete_comma();
 | 
			
		||||
    std::string         fileName_;
 | 
			
		||||
    std::ostringstream  ss_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class JSONReader: public Reader<JSONReader>
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    JSONReader(const std::string &fileName);
 | 
			
		||||
    virtual ~JSONReader(void) = default;
 | 
			
		||||
    bool push(const std::string &s);
 | 
			
		||||
    void pop(void);
 | 
			
		||||
    bool nextElement(const std::string &s);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void readDefault(const std::string &s, U &output);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void readDefault(const std::string &s, std::complex<U> &output);
 | 
			
		||||
    template <typename U>
 | 
			
		||||
    void readDefault(const std::string &s, std::vector<U> &output);
 | 
			
		||||
    template <typename U, typename P>
 | 
			
		||||
    void readDefault(const std::string &s, std::pair<U,P> &output);
 | 
			
		||||
  private:
 | 
			
		||||
    json                jobject_; // main object
 | 
			
		||||
    json                jcur_;  // current json object
 | 
			
		||||
    std::vector<json>   jold_;  // previous json object
 | 
			
		||||
    std::string         fileName_;
 | 
			
		||||
    std::vector<bool>   do_pop;
 | 
			
		||||
    json::iterator      it_;
 | 
			
		||||
    json::iterator      it_end_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <>
 | 
			
		||||
  struct isReader< JSONReader > {
 | 
			
		||||
    static const bool value = true;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <>
 | 
			
		||||
  struct isWriter< JSONWriter > {
 | 
			
		||||
    static const bool value = true;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Writer template implementation ////////////////////////////////////////////
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s, const U &x)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(U) : " << s <<  " " << x <<std::endl;
 | 
			
		||||
    std::ostringstream os;
 | 
			
		||||
    os << std::boolalpha << x;
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << "\""<< s << "\" : " << os.str() << " ," ;
 | 
			
		||||
    else
 | 
			
		||||
     ss_ << os.str() << " ," ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s, const std::complex<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(complex) : " << s <<  " " << x <<  std::endl;
 | 
			
		||||
    std::ostringstream os;
 | 
			
		||||
    os << "["<< std::boolalpha << x.real() << ", " << x.imag() << "]";
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << "\""<< s << "\" : " << os.str() << " ," ;
 | 
			
		||||
    else
 | 
			
		||||
     ss_ << os.str() << " ," ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename U, typename P>
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s, const std::pair<U,P> &x)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(pair) : " << s <<  " " << x <<  std::endl;
 | 
			
		||||
    std::ostringstream os;
 | 
			
		||||
    os << "["<< std::boolalpha << "\""<< x.first << "\" , \"" << x.second << "\" ]";
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << "\""<< s << "\" : " << os.str() << " ," ;
 | 
			
		||||
    else
 | 
			
		||||
     ss_ << os.str() << " ," ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s, const std::vector<U> &x)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(vec U) : " << s <<  std::endl;
 | 
			
		||||
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << " \""<<s<<"\" : [";
 | 
			
		||||
    else
 | 
			
		||||
      ss_ << " [";
 | 
			
		||||
    for (auto &x_i: x)
 | 
			
		||||
    {
 | 
			
		||||
      write("", x_i);
 | 
			
		||||
    }
 | 
			
		||||
    delete_comma();
 | 
			
		||||
    ss_<< "],";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<std::size_t N>
 | 
			
		||||
  void JSONWriter::writeDefault(const std::string &s, const char(&x)[N]){
 | 
			
		||||
    //std::cout << "JSONWriter::writeDefault(char U) : " << s <<  "  " << x << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      ss_ << "\""<< s << "\" : \"" << x << "\" ," ;
 | 
			
		||||
    else
 | 
			
		||||
      ss_ << "\"" << x << "\" ," ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Reader template implementation ////////////////////////////////////////////
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONReader::readDefault(const std::string &s, U &output)
 | 
			
		||||
  {
 | 
			
		||||
    //std::cout << "JSONReader::readDefault(U) : " << s << "  :  "<< jcur_ << std::endl;
 | 
			
		||||
 | 
			
		||||
    if (s.size()){
 | 
			
		||||
      //std::cout << "String: "<< jcur_[s] << std::endl;
 | 
			
		||||
      output = jcur_[s];
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      //std::cout << "String: "<< jcur_ << std::endl;
 | 
			
		||||
      output = jcur_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Reader template implementation ////////////////////////////////////////////
 | 
			
		||||
  template <typename U, typename P>
 | 
			
		||||
  void JSONReader::readDefault(const std::string &s, std::pair<U,P> &output)
 | 
			
		||||
  {
 | 
			
		||||
    U first;
 | 
			
		||||
    P second;
 | 
			
		||||
    json j;
 | 
			
		||||
    if (s.size()){
 | 
			
		||||
      //std::cout << "JSONReader::readDefault(pair) : " << s << "  |  "<< jcur_[s] << std::endl;
 | 
			
		||||
      j = jcur_[s];
 | 
			
		||||
    } else {
 | 
			
		||||
      j = jcur_;
 | 
			
		||||
    }
 | 
			
		||||
    json::iterator it = j.begin();
 | 
			
		||||
    jcur_ = *it;
 | 
			
		||||
    read("", first);
 | 
			
		||||
    it++;
 | 
			
		||||
    jcur_ = *it;
 | 
			
		||||
    read("", second);
 | 
			
		||||
    output = std::pair<U,P>(first,second);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONReader::readDefault(const std::string &s, std::complex<U> &output)
 | 
			
		||||
  {
 | 
			
		||||
    U tmp1, tmp2;
 | 
			
		||||
    //std::cout << "JSONReader::readDefault(complex U) : " << s << "  :  "<< jcur_ << std::endl;
 | 
			
		||||
    json j = jcur_;
 | 
			
		||||
    json::iterator it = j.begin();
 | 
			
		||||
    jcur_ = *it;
 | 
			
		||||
    read("", tmp1);
 | 
			
		||||
    it++;
 | 
			
		||||
    jcur_ = *it;
 | 
			
		||||
    read("", tmp2);
 | 
			
		||||
    output = std::complex<U>(tmp1,tmp2);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template <>
 | 
			
		||||
  void JSONReader::readDefault(const std::string &s, std::string &output);
 | 
			
		||||
 | 
			
		||||
  template <typename U>
 | 
			
		||||
  void JSONReader::readDefault(const std::string &s, std::vector<U> &output)
 | 
			
		||||
  {
 | 
			
		||||
    std::string    buf;
 | 
			
		||||
    unsigned int   i = 0;
 | 
			
		||||
    //std::cout << "JSONReader::readDefault(vec) : " << jcur_ << std::endl;
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      push(s);
 | 
			
		||||
 | 
			
		||||
    json j = jcur_;
 | 
			
		||||
    for (json::iterator it = j.begin(); it != j.end(); ++it) {
 | 
			
		||||
      jcur_ = *it;
 | 
			
		||||
      //std::cout << "Value: " << it.value() << "\n";
 | 
			
		||||
      output.resize(i + 1);
 | 
			
		||||
      read("", output[i++]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    jcur_ = j;
 | 
			
		||||
    if (s.size())
 | 
			
		||||
      pop();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,439 +0,0 @@
 | 
			
		||||
#ifndef GRID_SERIALISATION_VECTORUTILS_H
 | 
			
		||||
#define GRID_SERIALISATION_VECTORUTILS_H
 | 
			
		||||
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <Grid/tensors/Tensors.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  // Pair IO utilities /////////////////////////////////////////////////////////
 | 
			
		||||
  // helper function to parse input in the format "<obj1 obj2>"
 | 
			
		||||
  template <typename T1, typename T2>
 | 
			
		||||
  inline std::istream & operator>>(std::istream &is, std::pair<T1, T2> &buf)
 | 
			
		||||
  {
 | 
			
		||||
    T1 buf1;
 | 
			
		||||
    T2 buf2;
 | 
			
		||||
    char c;
 | 
			
		||||
 | 
			
		||||
    // Search for "pair" delimiters.
 | 
			
		||||
    do
 | 
			
		||||
    {
 | 
			
		||||
      is.get(c);
 | 
			
		||||
    } while (c != '(' && !is.eof());
 | 
			
		||||
    if (c == '(')
 | 
			
		||||
    {
 | 
			
		||||
      int start = is.tellg();
 | 
			
		||||
      do
 | 
			
		||||
      {
 | 
			
		||||
        is.get(c);
 | 
			
		||||
      } while (c != ')' && !is.eof());
 | 
			
		||||
      if (c == ')')
 | 
			
		||||
      {
 | 
			
		||||
        int end = is.tellg();
 | 
			
		||||
        int psize = end - start - 1;
 | 
			
		||||
 | 
			
		||||
        // Only read data between pair limiters.
 | 
			
		||||
        is.seekg(start);
 | 
			
		||||
        std::string tmpstr(psize, ' ');
 | 
			
		||||
        is.read(&tmpstr[0], psize);
 | 
			
		||||
        std::istringstream temp(tmpstr);
 | 
			
		||||
        temp >> buf1 >> buf2;
 | 
			
		||||
        buf = std::make_pair(buf1, buf2);
 | 
			
		||||
        is.seekg(end);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    is.peek();
 | 
			
		||||
    return is;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // output to streams for pairs
 | 
			
		||||
  template <class T1, class T2>
 | 
			
		||||
  inline std::ostream & operator<<(std::ostream &os, const std::pair<T1, T2> &p)
 | 
			
		||||
  {
 | 
			
		||||
    os << "(" << p.first << " " << p.second << ")";
 | 
			
		||||
    return os;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Grid scalar tensors to nested std::vectors //////////////////////////////////
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct TensorToVec
 | 
			
		||||
  {
 | 
			
		||||
    typedef T type;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct TensorToVec<iScalar<T>>
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename TensorToVec<T>::type type;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  struct TensorToVec<iVector<T, N>>
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename std::vector<typename TensorToVec<T>::type> type;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  struct TensorToVec<iMatrix<T, N>>
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename std::vector<std::vector<typename TensorToVec<T>::type>> type;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  void tensorDim(std::vector<size_t> &dim, const T &t, const bool wipe = true)
 | 
			
		||||
  {
 | 
			
		||||
    if (wipe)
 | 
			
		||||
    {
 | 
			
		||||
      dim.clear();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  void tensorDim(std::vector<size_t> &dim, const iScalar<T> &t, const bool wipe = true)
 | 
			
		||||
  {
 | 
			
		||||
    if (wipe)
 | 
			
		||||
    {
 | 
			
		||||
      dim.clear();
 | 
			
		||||
    }
 | 
			
		||||
    tensorDim(dim, t._internal, false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  void tensorDim(std::vector<size_t> &dim, const iVector<T, N> &t, const bool wipe = true)
 | 
			
		||||
  {
 | 
			
		||||
    if (wipe)
 | 
			
		||||
    {
 | 
			
		||||
      dim.clear();
 | 
			
		||||
    }
 | 
			
		||||
    dim.push_back(N);
 | 
			
		||||
    tensorDim(dim, t._internal[0], false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  void tensorDim(std::vector<size_t> &dim, const iMatrix<T, N> &t, const bool wipe = true)
 | 
			
		||||
  {
 | 
			
		||||
    if (wipe)
 | 
			
		||||
    {
 | 
			
		||||
      dim.clear();
 | 
			
		||||
    }
 | 
			
		||||
    dim.push_back(N);
 | 
			
		||||
    dim.push_back(N);
 | 
			
		||||
    tensorDim(dim, t._internal[0][0], false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  typename TensorToVec<T>::type tensorToVec(const T &t)
 | 
			
		||||
  {
 | 
			
		||||
    return t;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  typename TensorToVec<iScalar<T>>::type tensorToVec(const iScalar<T>& t)
 | 
			
		||||
  {
 | 
			
		||||
    return tensorToVec(t._internal);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  typename TensorToVec<iVector<T, N>>::type tensorToVec(const iVector<T, N>& t)
 | 
			
		||||
  {
 | 
			
		||||
    typename TensorToVec<iVector<T, N>>::type v;
 | 
			
		||||
 | 
			
		||||
    v.resize(N);
 | 
			
		||||
    for (unsigned int i = 0; i < N; i++) 
 | 
			
		||||
    {
 | 
			
		||||
      v[i] = tensorToVec(t._internal[i]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return v;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  typename TensorToVec<iMatrix<T, N>>::type tensorToVec(const iMatrix<T, N>& t)
 | 
			
		||||
  {
 | 
			
		||||
    typename TensorToVec<iMatrix<T, N>>::type v;
 | 
			
		||||
 | 
			
		||||
    v.resize(N);
 | 
			
		||||
    for (unsigned int i = 0; i < N; i++)
 | 
			
		||||
    {
 | 
			
		||||
      v[i].resize(N);
 | 
			
		||||
      for (unsigned int j = 0; j < N; j++) 
 | 
			
		||||
      {
 | 
			
		||||
        v[i][j] = tensorToVec(t._internal[i][j]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return v;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  void vecToTensor(T &t, const typename TensorToVec<T>::type &v)
 | 
			
		||||
  {
 | 
			
		||||
    t = v;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  void vecToTensor(iScalar<T> &t, const typename TensorToVec<iScalar<T>>::type &v)
 | 
			
		||||
  {
 | 
			
		||||
    vecToTensor(t._internal, v);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  void vecToTensor(iVector<T, N> &t, const typename TensorToVec<iVector<T, N>>::type &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (unsigned int i = 0; i < N; i++) 
 | 
			
		||||
    {
 | 
			
		||||
      vecToTensor(t._internal[i], v[i]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename T, int N>
 | 
			
		||||
  void vecToTensor(iMatrix<T, N> &t, const typename TensorToVec<iMatrix<T, N>>::type &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (unsigned int i = 0; i < N; i++)
 | 
			
		||||
    for (unsigned int j = 0; j < N; j++)
 | 
			
		||||
    {
 | 
			
		||||
      vecToTensor(t._internal[i][j], v[i][j]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Vector element trait //////////////////////////////////////////////////////  
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct element
 | 
			
		||||
  {
 | 
			
		||||
    typedef T type;
 | 
			
		||||
    static constexpr bool is_number = false;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  struct element<std::vector<T>>
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename element<T>::type type;
 | 
			
		||||
    static constexpr bool is_number = std::is_arithmetic<T>::value
 | 
			
		||||
                                      or is_complex<T>::value
 | 
			
		||||
                                      or element<T>::is_number;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // Vector flattening utility class ////////////////////////////////////////////
 | 
			
		||||
  // Class to flatten a multidimensional std::vector
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  class Flatten
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef typename element<V>::type Element;
 | 
			
		||||
  public:
 | 
			
		||||
    explicit                     Flatten(const V &vector);
 | 
			
		||||
    const V &                    getVector(void);
 | 
			
		||||
    const std::vector<Element> & getFlatVector(void);
 | 
			
		||||
    const std::vector<size_t>  & getDim(void);
 | 
			
		||||
  private:
 | 
			
		||||
    void accumulate(const Element &e);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void accumulate(const W &v);
 | 
			
		||||
    void accumulateDim(const Element &e);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void accumulateDim(const W &v);
 | 
			
		||||
  private:
 | 
			
		||||
    const V              &vector_;
 | 
			
		||||
    std::vector<Element> flatVector_;
 | 
			
		||||
    std::vector<size_t>  dim_;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // Class to reconstruct a multidimensional std::vector
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  class Reconstruct
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    typedef typename element<V>::type Element;
 | 
			
		||||
  public:
 | 
			
		||||
    Reconstruct(const std::vector<Element> &flatVector,
 | 
			
		||||
                const std::vector<size_t> &dim);
 | 
			
		||||
    const V &                    getVector(void);
 | 
			
		||||
    const std::vector<Element> & getFlatVector(void);
 | 
			
		||||
    const std::vector<size_t>  & getDim(void);
 | 
			
		||||
  private:
 | 
			
		||||
    void fill(std::vector<Element> &v);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void fill(W &v);
 | 
			
		||||
    void resize(std::vector<Element> &v, const unsigned int dim);
 | 
			
		||||
    template <typename W>
 | 
			
		||||
    void resize(W &v, const unsigned int dim);
 | 
			
		||||
  private:
 | 
			
		||||
    V                          vector_;
 | 
			
		||||
    const std::vector<Element> &flatVector_;
 | 
			
		||||
    std::vector<size_t>        dim_;
 | 
			
		||||
    size_t                     ind_{0};
 | 
			
		||||
    unsigned int               dimInd_{0};
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Flatten class template implementation
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Flatten<V>::accumulate(const Element &e)
 | 
			
		||||
  {
 | 
			
		||||
    flatVector_.push_back(e);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Flatten<V>::accumulate(const W &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
      accumulate(e);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Flatten<V>::accumulateDim(const Element &e) {};
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Flatten<V>::accumulateDim(const W &v)
 | 
			
		||||
  {
 | 
			
		||||
    dim_.push_back(v.size());
 | 
			
		||||
    accumulateDim(v[0]);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  Flatten<V>::Flatten(const V &vector)
 | 
			
		||||
  : vector_(vector)
 | 
			
		||||
  {
 | 
			
		||||
    accumulate(vector_);
 | 
			
		||||
    accumulateDim(vector_);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const V & Flatten<V>::getVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return vector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<typename Flatten<V>::Element> &
 | 
			
		||||
  Flatten<V>::getFlatVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return flatVector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<size_t> & Flatten<V>::getDim(void)
 | 
			
		||||
  {
 | 
			
		||||
    return dim_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Reconstruct class template implementation
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Reconstruct<V>::fill(std::vector<Element> &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
      e = flatVector_[ind_++];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Reconstruct<V>::fill(W &v)
 | 
			
		||||
  {
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
      fill(e);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
 | 
			
		||||
  {
 | 
			
		||||
    v.resize(dim_[dim]);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  template <typename W>
 | 
			
		||||
  void Reconstruct<V>::resize(W &v, const unsigned int dim)
 | 
			
		||||
  {
 | 
			
		||||
    v.resize(dim_[dim]);
 | 
			
		||||
    for (auto &e: v)
 | 
			
		||||
    {
 | 
			
		||||
      resize(e, dim + 1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
 | 
			
		||||
                              const std::vector<size_t> &dim)
 | 
			
		||||
  : flatVector_(flatVector)
 | 
			
		||||
  , dim_(dim)
 | 
			
		||||
  {
 | 
			
		||||
    resize(vector_, 0);
 | 
			
		||||
    fill(vector_);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const V & Reconstruct<V>::getVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return vector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<typename Reconstruct<V>::Element> &
 | 
			
		||||
  Reconstruct<V>::getFlatVector(void)
 | 
			
		||||
  {
 | 
			
		||||
    return flatVector_;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <typename V>
 | 
			
		||||
  const std::vector<size_t> & Reconstruct<V>::getDim(void)
 | 
			
		||||
  {
 | 
			
		||||
    return dim_;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Vector IO utilities ///////////////////////////////////////////////////////
 | 
			
		||||
  // helper function to read space-separated values
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  std::vector<T> strToVec(const std::string s)
 | 
			
		||||
  {
 | 
			
		||||
    std::istringstream sstr(s);
 | 
			
		||||
    T                  buf;
 | 
			
		||||
    std::vector<T>     v;
 | 
			
		||||
    
 | 
			
		||||
    while(!sstr.eof())
 | 
			
		||||
    {
 | 
			
		||||
      sstr >> buf;
 | 
			
		||||
      v.push_back(buf);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return v;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // output to streams for vectors
 | 
			
		||||
  template < class T >
 | 
			
		||||
  inline std::ostream & operator<<(std::ostream &os, const std::vector<T> &v)
 | 
			
		||||
  {
 | 
			
		||||
    os << "[";
 | 
			
		||||
    for (unsigned int i = 0; i < v.size(); ++i)
 | 
			
		||||
    {
 | 
			
		||||
      os << v[i];
 | 
			
		||||
      if (i < v.size() - 1)
 | 
			
		||||
      {
 | 
			
		||||
        os << " ";
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    os << "]";
 | 
			
		||||
    
 | 
			
		||||
    return os;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// helper function to read space-separated values
 | 
			
		||||
template <typename T>
 | 
			
		||||
std::string vecToStr(const std::vector<T> &v)
 | 
			
		||||
{
 | 
			
		||||
  using Grid::operator<<;
 | 
			
		||||
  
 | 
			
		||||
  std::ostringstream sstr;
 | 
			
		||||
 | 
			
		||||
  sstr << v;
 | 
			
		||||
 | 
			
		||||
  return sstr.str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,188 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/serialisation/XmlIO.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 | 
			
		||||
void Grid::xmlCheckParse(const pugi::xml_parse_result &result, const std::string name)
 | 
			
		||||
{
 | 
			
		||||
  if (!result) 
 | 
			
		||||
  {
 | 
			
		||||
    std::cerr << "XML parsing error for " << name << std::endl;
 | 
			
		||||
    std::cerr << "XML error description: " << result.description() << std::endl;
 | 
			
		||||
    std::cerr << "XML error offset     : " << result.offset << std::endl;
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Writer implementation ///////////////////////////////////////////////////////
 | 
			
		||||
XmlWriter::XmlWriter(const std::string &fileName, std::string toplev) : fileName_(fileName)
 | 
			
		||||
{
 | 
			
		||||
  if ( toplev == std::string("") ) {
 | 
			
		||||
    node_=doc_;
 | 
			
		||||
  } else { 
 | 
			
		||||
    node_=doc_.append_child();
 | 
			
		||||
    node_.set_name(toplev.c_str());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
XmlWriter::~XmlWriter(void)
 | 
			
		||||
{
 | 
			
		||||
  if ( fileName_ != std::string("") ) { 
 | 
			
		||||
    doc_.save_file(fileName_.c_str(), indent_.c_str());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void XmlWriter::push(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  node_ = node_.append_child(s.c_str());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void XmlWriter::pushXmlString(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  pugi::xml_document doc;
 | 
			
		||||
  auto               result = doc.load_buffer(s.c_str(), s.size());
 | 
			
		||||
 | 
			
		||||
  xmlCheckParse(result, "fragment\n'" + s +"'");
 | 
			
		||||
  for (pugi::xml_node child = doc.first_child(); child; child = child.next_sibling())
 | 
			
		||||
  {
 | 
			
		||||
      node_ = node_.append_copy(child);
 | 
			
		||||
  }
 | 
			
		||||
  pop();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void XmlWriter::pop(void)
 | 
			
		||||
{
 | 
			
		||||
  node_ = node_.parent();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string XmlWriter::docString(void)
 | 
			
		||||
{
 | 
			
		||||
  std::ostringstream oss; 
 | 
			
		||||
  doc_.save(oss, indent_.c_str());
 | 
			
		||||
  return oss.str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string XmlWriter::string(void)
 | 
			
		||||
{
 | 
			
		||||
  std::ostringstream oss; 
 | 
			
		||||
  doc_.save(oss, indent_.c_str(), pugi::format_default | pugi::format_no_declaration);
 | 
			
		||||
  return oss.str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Reader implementation ///////////////////////////////////////////////////////
 | 
			
		||||
XmlReader::XmlReader(const std::string &s,  const bool isBuffer, 
 | 
			
		||||
                     std::string toplev) 
 | 
			
		||||
{
 | 
			
		||||
  pugi::xml_parse_result result;
 | 
			
		||||
  
 | 
			
		||||
  if (isBuffer)
 | 
			
		||||
  {
 | 
			
		||||
    fileName_ = "<string>";
 | 
			
		||||
    result    = doc_.load_string(s.c_str());
 | 
			
		||||
    xmlCheckParse(result, "string\n'" + s + "'");
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    fileName_ = s;
 | 
			
		||||
    result    = doc_.load_file(s.c_str());
 | 
			
		||||
    xmlCheckParse(result, "file '" + fileName_ + "'");
 | 
			
		||||
  }
 | 
			
		||||
  if ( toplev == std::string("") ) {
 | 
			
		||||
  node_ = doc_;
 | 
			
		||||
  } else { 
 | 
			
		||||
    node_ = doc_.child(toplev.c_str());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define XML_SAFE_NODE(expr)\
 | 
			
		||||
if (expr)\
 | 
			
		||||
{\
 | 
			
		||||
  node_ = expr;\
 | 
			
		||||
  return true;\
 | 
			
		||||
}\
 | 
			
		||||
else\
 | 
			
		||||
{\
 | 
			
		||||
  return false;\
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool XmlReader::push(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  if (s.empty())
 | 
			
		||||
  {
 | 
			
		||||
    XML_SAFE_NODE(node_.first_child());
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    XML_SAFE_NODE(node_.child(s.c_str()));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void XmlReader::pop(void)
 | 
			
		||||
{
 | 
			
		||||
  node_ = node_.parent();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool XmlReader::nextElement(const std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  if (s.empty())
 | 
			
		||||
  {
 | 
			
		||||
    XML_SAFE_NODE(node_.next_sibling());
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    XML_SAFE_NODE(node_.next_sibling(s.c_str()));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void XmlReader::readCurrentSubtree(std::string &s)
 | 
			
		||||
{
 | 
			
		||||
  std::ostringstream oss; 
 | 
			
		||||
  pugi::xml_document doc;
 | 
			
		||||
 | 
			
		||||
  doc.append_copy(node_);
 | 
			
		||||
  doc.save(oss, indent_.c_str(), pugi::format_default | pugi::format_no_declaration);
 | 
			
		||||
  s = oss.str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void XmlReader::readDefault(const std::string &s, std::string &output)
 | 
			
		||||
{
 | 
			
		||||
  if (node_.child(s.c_str()))
 | 
			
		||||
  {
 | 
			
		||||
    output = node_.child(s.c_str()).first_child().value();
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
 | 
			
		||||
    std::cout << std::endl;
 | 
			
		||||
 | 
			
		||||
    output = ""; 
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -1,599 +0,0 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/simd/Grid_neon.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
    Author: Nils Meyer <nils.meyer@ur.de>
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 | 
			
		||||
  ARMv8 NEON intrinsics layer by
 | 
			
		||||
 | 
			
		||||
  Nils Meyer <nils.meyer@ur.de>,
 | 
			
		||||
  University of Regensburg, Germany
 | 
			
		||||
  SFB/TRR55
 | 
			
		||||
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
#ifndef GEN_SIMD_WIDTH
 | 
			
		||||
#define GEN_SIMD_WIDTH 16u
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include "Grid_generic_types.h"
 | 
			
		||||
#include <arm_neon.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
 | 
			
		||||
  template<class vtype>
 | 
			
		||||
  union uconv {
 | 
			
		||||
    float32x4_t f;
 | 
			
		||||
    vtype v;
 | 
			
		||||
  };
 | 
			
		||||
  union u128f {
 | 
			
		||||
    float32x4_t v;
 | 
			
		||||
    float f[4];
 | 
			
		||||
  };
 | 
			
		||||
  union u128d {
 | 
			
		||||
    float64x2_t v;
 | 
			
		||||
    double f[2];
 | 
			
		||||
  };
 | 
			
		||||
  // half precision
 | 
			
		||||
  union u128h {
 | 
			
		||||
    float16x8_t v;
 | 
			
		||||
    uint16_t f[8];
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Vsplat{
 | 
			
		||||
    //Complex float
 | 
			
		||||
    inline float32x4_t operator()(float a, float b){
 | 
			
		||||
      float tmp[4]={a,b,a,b};
 | 
			
		||||
      return vld1q_f32(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline float32x4_t operator()(float a){
 | 
			
		||||
      return vdupq_n_f32(a);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline float64x2_t operator()(double a, double b){
 | 
			
		||||
      double tmp[2]={a,b};
 | 
			
		||||
      return vld1q_f64(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    //Real double
 | 
			
		||||
    inline float64x2_t operator()(double a){
 | 
			
		||||
      return vdupq_n_f64(a);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline uint32x4_t operator()(Integer a){
 | 
			
		||||
      return vdupq_n_u32(a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Vstore{
 | 
			
		||||
    //Float
 | 
			
		||||
    inline void operator()(float32x4_t a, float* F){
 | 
			
		||||
      vst1q_f32(F, a);
 | 
			
		||||
    }
 | 
			
		||||
    //Double
 | 
			
		||||
    inline void operator()(float64x2_t a, double* D){
 | 
			
		||||
      vst1q_f64(D, a);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline void operator()(uint32x4_t a, Integer* I){
 | 
			
		||||
      vst1q_u32(I, a);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Vstream{ // N:equivalents to _mm_stream_p* in NEON?
 | 
			
		||||
    //Float // N:generic
 | 
			
		||||
    inline void operator()(float * a, float32x4_t b){
 | 
			
		||||
      memcpy(a,&b,4*sizeof(float));
 | 
			
		||||
    }
 | 
			
		||||
    //Double // N:generic
 | 
			
		||||
    inline void operator()(double * a, float64x2_t b){
 | 
			
		||||
      memcpy(a,&b,2*sizeof(double));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Nils: Vset untested; not used currently in Grid at all;
 | 
			
		||||
  // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b
 | 
			
		||||
  struct Vset{
 | 
			
		||||
    // Complex float
 | 
			
		||||
    inline float32x4_t operator()(Grid::ComplexF *a){
 | 
			
		||||
      float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()};
 | 
			
		||||
      return vld1q_f32(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double
 | 
			
		||||
    inline float64x2_t operator()(Grid::ComplexD *a){
 | 
			
		||||
      double tmp[2]={a[0].imag(),a[0].real()};
 | 
			
		||||
      return vld1q_f64(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline float32x4_t operator()(float *a){
 | 
			
		||||
      float tmp[4]={a[3],a[2],a[1],a[0]};
 | 
			
		||||
      return vld1q_f32(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline float64x2_t operator()(double *a){
 | 
			
		||||
      double tmp[2]={a[1],a[0]};
 | 
			
		||||
      return vld1q_f64(tmp);
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline uint32x4_t operator()(Integer *a){
 | 
			
		||||
      return vld1q_dup_u32(a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template <typename Out_type, typename In_type>
 | 
			
		||||
  struct Reduce{
 | 
			
		||||
    //Need templated class to overload output type
 | 
			
		||||
    //General form must generate error if compiled
 | 
			
		||||
      inline Out_type operator()(In_type in){
 | 
			
		||||
      printf("Error, using wrong Reduce function\n");
 | 
			
		||||
      exit(1);
 | 
			
		||||
      return 0;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  struct Sum{
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
      return vaddq_f32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex/Real double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
      return vaddq_f64(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
			
		||||
      return vaddq_u32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Sub{
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
      return vsubq_f32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex/Real double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
      return vsubq_f64(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
			
		||||
      return vsubq_u32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct MultRealPart{
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
      float32x4_t re = vtrn1q_f32(a, a);
 | 
			
		||||
      return vmulq_f32(re, b);
 | 
			
		||||
    }
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
      float64x2_t re = vzip1q_f64(a, a);
 | 
			
		||||
      return vmulq_f64(re, b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct MaddRealPart{
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b, float32x4_t c){
 | 
			
		||||
      float32x4_t re = vtrn1q_f32(a, a);
 | 
			
		||||
      return vfmaq_f32(c, re, b);
 | 
			
		||||
    }
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b, float64x2_t c){
 | 
			
		||||
      float64x2_t re = vzip1q_f64(a, a);
 | 
			
		||||
      return vfmaq_f64(c, re, b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Div{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
      return vdivq_f32(a, b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
      return vdivq_f64(a, b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct MultComplex{
 | 
			
		||||
    // Complex float
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
 | 
			
		||||
      float32x4_t r0, r1, r2, r3, r4;
 | 
			
		||||
 | 
			
		||||
      // a = ar ai Ar Ai
 | 
			
		||||
      // b = br bi Br Bi
 | 
			
		||||
      // collect real/imag part, negate bi and Bi
 | 
			
		||||
      r0 = vtrn1q_f32(b, b);       //  br  br  Br  Br
 | 
			
		||||
      r1 = vnegq_f32(b);           // -br -bi -Br -Bi
 | 
			
		||||
      r2 = vtrn2q_f32(b, r1);      //  bi -bi  Bi -Bi
 | 
			
		||||
 | 
			
		||||
      // the fun part
 | 
			
		||||
      r3 = vmulq_f32(r2, a);       //  bi*ar -bi*ai ...
 | 
			
		||||
      r4 = vrev64q_f32(r3);        // -bi*ai  bi*ar ...
 | 
			
		||||
 | 
			
		||||
      // fma(a,b,c) = a+b*c
 | 
			
		||||
      return vfmaq_f32(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi ...
 | 
			
		||||
 | 
			
		||||
      // no fma, use mul and add
 | 
			
		||||
      // float32x4_t r5;
 | 
			
		||||
      // r5 = vmulq_f32(r0, a);
 | 
			
		||||
      // return vaddq_f32(r4, r5);
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
 | 
			
		||||
      float64x2_t r0, r1, r2, r3, r4;
 | 
			
		||||
 | 
			
		||||
      // b = br bi
 | 
			
		||||
      // collect real/imag part, negate bi
 | 
			
		||||
      r0 = vtrn1q_f64(b, b);       //  br  br
 | 
			
		||||
      r1 = vnegq_f64(b);           // -br -bi
 | 
			
		||||
      r2 = vtrn2q_f64(b, r1);      //  bi -bi
 | 
			
		||||
 | 
			
		||||
      // the fun part
 | 
			
		||||
      r3 = vmulq_f64(r2, a);       //  bi*ar -bi*ai
 | 
			
		||||
      r4 = vextq_f64(r3,r3,1);     // -bi*ai  bi*ar
 | 
			
		||||
 | 
			
		||||
      // fma(a,b,c) = a+b*c
 | 
			
		||||
      return vfmaq_f64(r4, r0, a); //  ar*br-ai*bi ai*br+ar*bi
 | 
			
		||||
 | 
			
		||||
      // no fma, use mul and add
 | 
			
		||||
      // float64x2_t r5;
 | 
			
		||||
      // r5 = vmulq_f64(r0, a);
 | 
			
		||||
      // return vaddq_f64(r4, r5);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Mult{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline float32x4_t mac(float32x4_t a, float32x4_t b, float32x4_t c){
 | 
			
		||||
      //return vaddq_f32(vmulq_f32(b,c),a);
 | 
			
		||||
      return vfmaq_f32(a, b, c);
 | 
			
		||||
    }
 | 
			
		||||
    inline float64x2_t mac(float64x2_t a, float64x2_t b, float64x2_t c){
 | 
			
		||||
      //return vaddq_f64(vmulq_f64(b,c),a);
 | 
			
		||||
      return vfmaq_f64(a, b, c);
 | 
			
		||||
    }
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t a, float32x4_t b){
 | 
			
		||||
      return vmulq_f32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t a, float64x2_t b){
 | 
			
		||||
      return vmulq_f64(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline uint32x4_t operator()(uint32x4_t a, uint32x4_t b){
 | 
			
		||||
      return vmulq_u32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Conj{
 | 
			
		||||
    // Complex single
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t in){
 | 
			
		||||
      // ar ai br bi -> ar -ai br -bi
 | 
			
		||||
      float32x4_t r0, r1;
 | 
			
		||||
      r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
			
		||||
      r1 = vrev64q_f32(r0);      // -ai -ar -bi -br
 | 
			
		||||
      return vtrn1q_f32(in, r1); //  ar -ai  br -bi
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t in){
 | 
			
		||||
 | 
			
		||||
      float64x2_t r0, r1;
 | 
			
		||||
      r0 = vextq_f64(in, in, 1);    //  ai  ar
 | 
			
		||||
      r1 = vnegq_f64(r0);           // -ai -ar
 | 
			
		||||
      return vextq_f64(r0, r1, 1);  //  ar -ai
 | 
			
		||||
    }
 | 
			
		||||
    // do not define for integer input
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct TimesMinusI{
 | 
			
		||||
    //Complex single
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
			
		||||
      // ar ai br bi -> ai -ar ai -br
 | 
			
		||||
      float32x4_t r0, r1;
 | 
			
		||||
      r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
			
		||||
      r1 = vrev64q_f32(in);      //  ai  ar  bi  br
 | 
			
		||||
      return vtrn1q_f32(r1, r0); //  ar -ai  br -bi
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
 | 
			
		||||
      // a ib -> b -ia
 | 
			
		||||
      float64x2_t tmp;
 | 
			
		||||
      tmp = vnegq_f64(in);
 | 
			
		||||
      return vextq_f64(in, tmp, 1);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct TimesI{
 | 
			
		||||
    //Complex single
 | 
			
		||||
    inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
			
		||||
      // ar ai br bi -> -ai ar -bi br
 | 
			
		||||
      float32x4_t r0, r1;
 | 
			
		||||
      r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
			
		||||
      r1 = vrev64q_f32(r0);      // -ai -ar -bi -br
 | 
			
		||||
      return vtrn1q_f32(r1, in); // -ai  ar -bi  br
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
 | 
			
		||||
      // a ib -> -b ia
 | 
			
		||||
      float64x2_t tmp;
 | 
			
		||||
      tmp = vnegq_f64(in);
 | 
			
		||||
      return vextq_f64(tmp, in, 1);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Permute{
 | 
			
		||||
 | 
			
		||||
    static inline float32x4_t Permute0(float32x4_t in){ // N:ok
 | 
			
		||||
      // AB CD -> CD AB
 | 
			
		||||
      return vextq_f32(in, in, 2);
 | 
			
		||||
    };
 | 
			
		||||
    static inline float32x4_t Permute1(float32x4_t in){ // N:ok
 | 
			
		||||
      // AB CD -> BA DC
 | 
			
		||||
      return vrev64q_f32(in);
 | 
			
		||||
    };
 | 
			
		||||
    static inline float32x4_t Permute2(float32x4_t in){ // N:not used by Boyle
 | 
			
		||||
      return in;
 | 
			
		||||
    };
 | 
			
		||||
    static inline float32x4_t Permute3(float32x4_t in){ // N:not used by Boyle
 | 
			
		||||
      return in;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    static inline float64x2_t Permute0(float64x2_t in){ // N:ok
 | 
			
		||||
      // AB -> BA
 | 
			
		||||
      return vextq_f64(in, in, 1);
 | 
			
		||||
    };
 | 
			
		||||
    static inline float64x2_t Permute1(float64x2_t in){ // N:not used by Boyle
 | 
			
		||||
      return in;
 | 
			
		||||
    };
 | 
			
		||||
    static inline float64x2_t Permute2(float64x2_t in){ // N:not used by Boyle
 | 
			
		||||
      return in;
 | 
			
		||||
    };
 | 
			
		||||
    static inline float64x2_t Permute3(float64x2_t in){ // N:not used by Boyle
 | 
			
		||||
      return in;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Rotate{
 | 
			
		||||
 | 
			
		||||
    static inline float32x4_t rotate(float32x4_t in,int n){ // N:ok
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0: // AB CD -> AB CD
 | 
			
		||||
        return tRotate<0>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      case 1: // AB CD -> BC DA
 | 
			
		||||
        return tRotate<1>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      case 2: // AB CD -> CD AB
 | 
			
		||||
        return tRotate<2>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      case 3: // AB CD -> DA BC
 | 
			
		||||
        return tRotate<3>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    static inline float64x2_t rotate(float64x2_t in,int n){ // N:ok
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0: // AB -> AB
 | 
			
		||||
        return tRotate<0>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      case 1: // AB -> BA
 | 
			
		||||
        return tRotate<1>(in);
 | 
			
		||||
        break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<int n> static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); };
 | 
			
		||||
    template<int n> static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); };
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct PrecisionChange {
 | 
			
		||||
 | 
			
		||||
    static inline float16x8_t StoH (const float32x4_t &a,const float32x4_t &b) {
 | 
			
		||||
      float16x4_t h = vcvt_f16_f32(a);
 | 
			
		||||
      return vcvt_high_f16_f32(h, b);
 | 
			
		||||
    }
 | 
			
		||||
    static inline void  HtoS (float16x8_t h,float32x4_t &sa,float32x4_t &sb) {
 | 
			
		||||
      sb = vcvt_high_f32_f16(h);
 | 
			
		||||
      // there is no direct conversion from lower float32x4_t to float64x2_t
 | 
			
		||||
      // vextq_f16 not supported by clang 3.8 / 4.0 / arm clang
 | 
			
		||||
      // float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang
 | 
			
		||||
      // workaround for clang
 | 
			
		||||
      uint32x4_t h1u = reinterpret_cast<uint32x4_t>(h);
 | 
			
		||||
      float16x8_t h1 = reinterpret_cast<float16x8_t>(vextq_u32(h1u, h1u, 2));
 | 
			
		||||
      sa = vcvt_high_f32_f16(h1);
 | 
			
		||||
    }
 | 
			
		||||
    static inline float32x4_t DtoS (float64x2_t a,float64x2_t b) {
 | 
			
		||||
      float32x2_t s = vcvt_f32_f64(a);
 | 
			
		||||
      return vcvt_high_f32_f64(s, b);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    static inline void StoD (float32x4_t s,float64x2_t &a,float64x2_t &b) {
 | 
			
		||||
      b = vcvt_high_f64_f32(s);
 | 
			
		||||
      // there is no direct conversion from lower float32x4_t to float64x2_t
 | 
			
		||||
      float32x4_t s1 = vextq_f32(s, s, 2);
 | 
			
		||||
      a = vcvt_high_f64_f32(s1);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    static inline float16x8_t DtoH (float64x2_t a,float64x2_t b,float64x2_t c,float64x2_t d) {
 | 
			
		||||
      float32x4_t s1 = DtoS(a, b);
 | 
			
		||||
      float32x4_t s2 = DtoS(c, d);
 | 
			
		||||
      return StoH(s1, s2);
 | 
			
		||||
    }
 | 
			
		||||
    static inline void HtoD (float16x8_t h,float64x2_t &a,float64x2_t &b,float64x2_t &c,float64x2_t &d) {
 | 
			
		||||
      float32x4_t s1, s2;
 | 
			
		||||
      HtoS(h, s1, s2);
 | 
			
		||||
      StoD(s1, a, b);
 | 
			
		||||
      StoD(s2, c, d);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
  // Exchange support
 | 
			
		||||
 | 
			
		||||
  struct Exchange{
 | 
			
		||||
    static inline void Exchange0(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
 | 
			
		||||
      // in1: ABCD -> out1: ABEF
 | 
			
		||||
      // in2: EFGH -> out2: CDGH
 | 
			
		||||
 | 
			
		||||
      // z: CDAB
 | 
			
		||||
      float32x4_t z = vextq_f32(in1, in1, 2);
 | 
			
		||||
      // out1: ABEF
 | 
			
		||||
      out1 = vextq_f32(z, in2, 2);
 | 
			
		||||
 | 
			
		||||
      // z: GHEF
 | 
			
		||||
      z = vextq_f32(in2, in2, 2);
 | 
			
		||||
      // out2: CDGH
 | 
			
		||||
      out2 = vextq_f32(in1, z, 2);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    static inline void Exchange1(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
 | 
			
		||||
      // in1: ABCD -> out1: AECG
 | 
			
		||||
      // in2: EFGH -> out2: BFDH
 | 
			
		||||
      out1 = vtrn1q_f32(in1, in2);
 | 
			
		||||
      out2 = vtrn2q_f32(in1, in2);
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange2(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
 | 
			
		||||
      assert(0);
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange3(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){
 | 
			
		||||
      assert(0);
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    // double precision
 | 
			
		||||
    static inline void Exchange0(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){
 | 
			
		||||
      // in1: AB -> out1: AC
 | 
			
		||||
      // in2: CD -> out2: BD
 | 
			
		||||
      out1 = vzip1q_f64(in1, in2);
 | 
			
		||||
      out2 = vzip2q_f64(in1, in2);
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange1(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){
 | 
			
		||||
      assert(0);
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange2(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){
 | 
			
		||||
      assert(0);
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange3(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){
 | 
			
		||||
      assert(0);
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
  // Some Template specialization
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //Complex float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::ComplexF Reduce<Grid::ComplexF, float32x4_t>::operator()(float32x4_t in){
 | 
			
		||||
    float32x4_t v1; // two complex
 | 
			
		||||
    v1 = Optimization::Permute::Permute0(in);
 | 
			
		||||
    v1 = vaddq_f32(v1,in);
 | 
			
		||||
    u128f conv;    conv.v=v1;
 | 
			
		||||
    return Grid::ComplexF(conv.f[0],conv.f[1]);
 | 
			
		||||
  }
 | 
			
		||||
  //Real float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::RealF Reduce<Grid::RealF, float32x4_t>::operator()(float32x4_t in){
 | 
			
		||||
    return vaddvq_f32(in);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //Complex double Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::ComplexD Reduce<Grid::ComplexD, float64x2_t>::operator()(float64x2_t in){
 | 
			
		||||
    u128d conv; conv.v = in;
 | 
			
		||||
    return Grid::ComplexD(conv.f[0],conv.f[1]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //Real double Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Grid::RealD Reduce<Grid::RealD, float64x2_t>::operator()(float64x2_t in){
 | 
			
		||||
    return vaddvq_f64(in);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //Integer Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
  inline Integer Reduce<Integer, uint32x4_t>::operator()(uint32x4_t in){
 | 
			
		||||
    return vaddvq_u32(in);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Here assign types
 | 
			
		||||
 | 
			
		||||
// typedef Optimization::vech SIMD_Htype; // Reduced precision type
 | 
			
		||||
  typedef float16x8_t  SIMD_Htype; // Half precision type
 | 
			
		||||
  typedef float32x4_t  SIMD_Ftype; // Single precision type
 | 
			
		||||
  typedef float64x2_t  SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef uint32x4_t   SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
  inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
 | 
			
		||||
  inline void prefetch_HINT_T0(const char *ptr){};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Function name aliases
 | 
			
		||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
			
		||||
  typedef Optimization::Vstore   VstoreSIMD;
 | 
			
		||||
  typedef Optimization::Vset     VsetSIMD;
 | 
			
		||||
  typedef Optimization::Vstream  VstreamSIMD;
 | 
			
		||||
  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Div         DivSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::MultRealPart MultRealPartSIMD;
 | 
			
		||||
  typedef Optimization::MaddRealPart MaddRealPartSIMD;
 | 
			
		||||
  typedef Optimization::Conj        ConjSIMD;
 | 
			
		||||
  typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
			
		||||
  typedef Optimization::TimesI      TimesISIMD;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,2 +0,0 @@
 | 
			
		||||
downloaded file from : http://www.sitmo.com/wp-content/uploads/2016/03/prng_engine.hpp
 | 
			
		||||
Unmodified beyond filename
 | 
			
		||||
@@ -1,144 +0,0 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/tensors/Tensor_exp.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_EXP_H
 | 
			
		||||
#define GRID_MATH_EXP_H
 | 
			
		||||
 | 
			
		||||
#define DEFAULT_MAT_EXP 12
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
  // Exponentiate function for scalar, vector, matrix
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP)
 | 
			
		||||
    {
 | 
			
		||||
      iScalar<vtype> ret;
 | 
			
		||||
      ret._internal = Exponentiate(r._internal, alpha, Nexp);
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP)
 | 
			
		||||
    {
 | 
			
		||||
      iVector<vtype, N> ret;
 | 
			
		||||
      for (int i = 0; i < N; i++)
 | 
			
		||||
        ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp);
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Specialisation: Cayley-Hamilton exponential for SU(3)
 | 
			
		||||
    template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
 | 
			
		||||
    inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
 | 
			
		||||
    {
 | 
			
		||||
    // for SU(3) 2x faster than the std implementation using Nexp=12
 | 
			
		||||
    // notice that it actually computes
 | 
			
		||||
    // exp ( input matrix )
 | 
			
		||||
    // the i sign is coming from outside
 | 
			
		||||
    // input matrix is anti-hermitian NOT hermitian
 | 
			
		||||
      typedef iMatrix<vtype,3> mat;
 | 
			
		||||
      typedef iScalar<vtype> scalar;
 | 
			
		||||
      mat unit(1.0);
 | 
			
		||||
      mat temp(unit);
 | 
			
		||||
      const Complex one_over_three = 1.0 / 3.0;
 | 
			
		||||
      const Complex one_over_two = 1.0 / 2.0;
 | 
			
		||||
 | 
			
		||||
      scalar c0, c1, tmp, c0max, theta, u, w;
 | 
			
		||||
      scalar xi0, u2, w2, cosw;
 | 
			
		||||
      scalar fden, h0, h1, h2;
 | 
			
		||||
      scalar e2iu, emiu, ixi0, qt;
 | 
			
		||||
      scalar f0, f1, f2;
 | 
			
		||||
      scalar unity(1.0);
 | 
			
		||||
      
 | 
			
		||||
      mat iQ2 = arg*arg*alpha*alpha;
 | 
			
		||||
      mat iQ3 = arg*iQ2*alpha;   
 | 
			
		||||
      // sign in c0 from the conventions on the Ta
 | 
			
		||||
      scalar imQ3, reQ2;
 | 
			
		||||
      imQ3 = imag( trace(iQ3) );
 | 
			
		||||
      reQ2 = real( trace(iQ2) );
 | 
			
		||||
      c0 = -imQ3 * one_over_three;  
 | 
			
		||||
      c1 = -reQ2 * one_over_two;
 | 
			
		||||
 | 
			
		||||
      // Cayley Hamilton checks to machine precision, tested
 | 
			
		||||
      tmp = c1 * one_over_three;
 | 
			
		||||
      c0max = 2.0 * pow(tmp, 1.5);
 | 
			
		||||
 | 
			
		||||
      theta = acos(c0 / c0max) * one_over_three;
 | 
			
		||||
      u = sqrt(tmp) * cos(theta);
 | 
			
		||||
      w = sqrt(c1) * sin(theta);
 | 
			
		||||
 | 
			
		||||
      xi0 = sin(w) / w;
 | 
			
		||||
      u2 = u * u;
 | 
			
		||||
      w2 = w * w;
 | 
			
		||||
      cosw = cos(w);
 | 
			
		||||
 | 
			
		||||
      ixi0 = timesI(xi0);
 | 
			
		||||
      emiu = cos(u) - timesI(sin(u));
 | 
			
		||||
      e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
 | 
			
		||||
 | 
			
		||||
      h0 = e2iu * (u2 - w2) +
 | 
			
		||||
           emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0));
 | 
			
		||||
      h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0);
 | 
			
		||||
      h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0);
 | 
			
		||||
 | 
			
		||||
      fden = unity / (9.0 * u2 - w2);  // reals
 | 
			
		||||
      f0 = h0 * fden;
 | 
			
		||||
      f1 = h1 * fden;
 | 
			
		||||
      f2 = h2 * fden;
 | 
			
		||||
 | 
			
		||||
      return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// General exponential
 | 
			
		||||
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
 | 
			
		||||
    inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
 | 
			
		||||
    {
 | 
			
		||||
    // notice that it actually computes
 | 
			
		||||
    // exp ( input matrix )
 | 
			
		||||
    // the i sign is coming from outside
 | 
			
		||||
    // input matrix is anti-hermitian NOT hermitian
 | 
			
		||||
      typedef iMatrix<vtype,N> mat;
 | 
			
		||||
      mat unit(1.0);
 | 
			
		||||
      mat temp(unit);
 | 
			
		||||
      for(int i=Nexp; i>=1;--i){
 | 
			
		||||
	      temp *= alpha/RealD(i);
 | 
			
		||||
	      temp = unit + temp*arg;
 | 
			
		||||
      }
 | 
			
		||||
      return temp;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user