mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hirep
This commit is contained in:
		
							
								
								
									
										25
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										25
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -5,7 +5,6 @@
 | 
			
		||||
*.o
 | 
			
		||||
*.obj
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Editor files #
 | 
			
		||||
################
 | 
			
		||||
*~
 | 
			
		||||
@@ -48,6 +47,7 @@ Config.h.in
 | 
			
		||||
config.log
 | 
			
		||||
config.status
 | 
			
		||||
.deps
 | 
			
		||||
*.inc
 | 
			
		||||
 | 
			
		||||
# http://www.gnu.org/software/autoconf #
 | 
			
		||||
########################################
 | 
			
		||||
@@ -63,19 +63,7 @@ config.sub
 | 
			
		||||
config.guess
 | 
			
		||||
INSTALL
 | 
			
		||||
.dirstamp
 | 
			
		||||
 | 
			
		||||
# Packages #
 | 
			
		||||
############
 | 
			
		||||
# it's better to unpack these files and commit the raw source
 | 
			
		||||
# git has its own built in compression methods
 | 
			
		||||
*.7z
 | 
			
		||||
*.dmg
 | 
			
		||||
*.gz
 | 
			
		||||
*.iso
 | 
			
		||||
*.jar
 | 
			
		||||
*.rar
 | 
			
		||||
*.tar
 | 
			
		||||
*.zip
 | 
			
		||||
ltmain.sh
 | 
			
		||||
 
 | 
			
		||||
# Logs and databases #
 | 
			
		||||
######################
 | 
			
		||||
@@ -101,3 +89,12 @@ build*/*
 | 
			
		||||
#####################
 | 
			
		||||
*.xcodeproj/*
 | 
			
		||||
build.sh
 | 
			
		||||
 | 
			
		||||
# Eigen source #
 | 
			
		||||
################
 | 
			
		||||
lib/Eigen/*
 | 
			
		||||
 | 
			
		||||
# libtool macros #
 | 
			
		||||
##################
 | 
			
		||||
m4/lt*
 | 
			
		||||
m4/libtool.m4
 | 
			
		||||
							
								
								
									
										25
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								.travis.yml
									
									
									
									
									
								
							@@ -23,6 +23,8 @@ matrix:
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-4.9
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
@@ -35,6 +37,8 @@ matrix:
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-5
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
@@ -47,6 +51,8 @@ matrix:
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
@@ -59,6 +65,8 @@ matrix:
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - libopenmpi-dev
 | 
			
		||||
            - openmpi-bin
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
 | 
			
		||||
      
 | 
			
		||||
@@ -69,6 +77,7 @@ before_install:
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
@@ -82,14 +91,20 @@ install:
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./autogen.sh
 | 
			
		||||
    - ./bootstrap.sh
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j1 -C prerequisites
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4 
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=double --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
    - echo make clean
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
 | 
			
		||||
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,5 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include/
 | 
			
		||||
SUBDIRS = lib benchmarks tests
 | 
			
		||||
 | 
			
		||||
SUBDIRS = prerequisites lib benchmarks tests
 | 
			
		||||
 | 
			
		||||
filelist: $(SUBDIRS)
 | 
			
		||||
AM_CXXFLAGS += -I$(top_builddir)/include
 | 
			
		||||
ACLOCAL_AMFLAGS = -I m4
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										110
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								README.md
									
									
									
									
									
								
							@@ -1,8 +1,28 @@
 | 
			
		||||
# Grid [](https://travis-ci.org/paboyle/Grid)
 | 
			
		||||
Data parallel C++ mathematical object library
 | 
			
		||||
# Grid
 | 
			
		||||
<table>
 | 
			
		||||
<tr>
 | 
			
		||||
    <td>Last stable release</td>
 | 
			
		||||
    <td><a href="https://travis-ci.org/paboyle/Grid">
 | 
			
		||||
    <img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
 | 
			
		||||
    </td>
 | 
			
		||||
</tr>
 | 
			
		||||
<tr>
 | 
			
		||||
    <td>Development branch</td>
 | 
			
		||||
    <td><a href="https://travis-ci.org/paboyle/Grid">
 | 
			
		||||
    <img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
 | 
			
		||||
    </td>
 | 
			
		||||
</tr>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
Last update 2015/7/30
 | 
			
		||||
**Data parallel C++ mathematical object library.**
 | 
			
		||||
 | 
			
		||||
Please send all pull requests to the `develop` branch.
 | 
			
		||||
 | 
			
		||||
License: GPL v2.
 | 
			
		||||
 | 
			
		||||
Last update 2016/08/03.
 | 
			
		||||
 | 
			
		||||
### Description
 | 
			
		||||
This library provides data parallel C++ container classes with internal memory layout
 | 
			
		||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
 | 
			
		||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
 | 
			
		||||
@@ -22,37 +42,75 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
 | 
			
		||||
for most programmers.
 | 
			
		||||
 | 
			
		||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way).
 | 
			
		||||
 | 
			
		||||
These are presented as 
 | 
			
		||||
 | 
			
		||||
     vRealF, vRealD, vComplexF, vComplexD 
 | 
			
		||||
 | 
			
		||||
internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named
 | 
			
		||||
 | 
			
		||||
     RealF, RealD, ComplexF, ComplexD
 | 
			
		||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
 | 
			
		||||
 | 
			
		||||
MPI, OpenMP, and SIMD parallelism are present in the library.
 | 
			
		||||
Please see https://arxiv.org/abs/1512.03487 for more detail.
 | 
			
		||||
 | 
			
		||||
   You can give `configure' initial values for configuration parameters
 | 
			
		||||
by setting variables in the command line or in the environment.  Here
 | 
			
		||||
are examples:
 | 
			
		||||
### Installation
 | 
			
		||||
First, start by cloning the repository:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
 | 
			
		||||
``` bash
 | 
			
		||||
git clone https://github.com/paboyle/Grid.git
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
 | 
			
		||||
Then enter the cloned directory and set up the build system:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
 | 
			
		||||
``` bash
 | 
			
		||||
cd Grid
 | 
			
		||||
./bootstrap.sh
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
 | 
			
		||||
     
 | 
			
		||||
Note: Before running configure it could be necessary to execute the script 
 | 
			
		||||
       
 | 
			
		||||
       script/filelist
 | 
			
		||||
Now you can execute the `configure` script to generate makefiles (here from a build directory):
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
mkdir build; cd build
 | 
			
		||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
     
 | 
			
		||||
For developers:
 | 
			
		||||
Use reconfigure_script in the scripts/ directory to create the autotools environment 
 | 
			
		||||
where `--enable-precision=` set the default precision (`single` or `double`),
 | 
			
		||||
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
 | 
			
		||||
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
 | 
			
		||||
`shmem`), and `<path>` should be replaced by the prefix path where you want to
 | 
			
		||||
install Grid. The `mpi-auto` communication option set `configure` to determine
 | 
			
		||||
automatically how to link to MPI. Other options are available, use `configure
 | 
			
		||||
--help` to display them. Like with any other program using GNU autotool, the
 | 
			
		||||
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
 | 
			
		||||
customise the build.
 | 
			
		||||
 | 
			
		||||
Finally, you can build and install Grid:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
make; make install
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
make -C tests/<subdir> tests
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Possible SIMD types
 | 
			
		||||
 | 
			
		||||
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
 | 
			
		||||
 | 
			
		||||
| String      | Description                            |
 | 
			
		||||
| ----------- | -------------------------------------- |
 | 
			
		||||
| `GEN`       | generic portable vector code           |
 | 
			
		||||
| `SSE4`      | SSE 4.2 (128 bit)                      |
 | 
			
		||||
| `AVX`       | AVX (256 bit)                          |
 | 
			
		||||
| `AVXFMA4`   | AVX (256 bit) + FMA                    |
 | 
			
		||||
| `AVX2`      | AVX 2 (256 bit)                        |
 | 
			
		||||
| `AVX512`    | AVX 512 bit                            |
 | 
			
		||||
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
 | 
			
		||||
| `ICMI`      | Intel ICMI instructions (512 bit)      |
 | 
			
		||||
 | 
			
		||||
Alternatively, some CPU codenames can be directly used:
 | 
			
		||||
 | 
			
		||||
| String      | Description                            |
 | 
			
		||||
| ----------- | -------------------------------------- |
 | 
			
		||||
| `KNC`       | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
 | 
			
		||||
| `KNL`       | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
 | 
			
		||||
@@ -1,4 +0,0 @@
 | 
			
		||||
aclocal -I m4
 | 
			
		||||
autoheader -f
 | 
			
		||||
automake -f --add-missing
 | 
			
		||||
autoconf -f
 | 
			
		||||
@@ -194,7 +194,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
    }
 | 
			
		||||
  }  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
 | 
			
		||||
@@ -315,7 +315,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -61,6 +61,8 @@ int main (int argc, char ** argv)
 | 
			
		||||
    QCD::WilsonKernelsStatic::AsmOpt=0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s)  "<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								benchmarks/Benchmark_wilson_sweep.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,117 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_wilson.cc
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Richard Rollins <rprollins@users.noreply.github.com>
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Gamma::GammaMatrix Gmu [] = {
 | 
			
		||||
  Gamma::GammaX,
 | 
			
		||||
  Gamma::GammaY,
 | 
			
		||||
  Gamma::GammaZ,
 | 
			
		||||
  Gamma::GammaT
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
void bench_wilson (
 | 
			
		||||
		   LatticeFermion &    src,
 | 
			
		||||
		   LatticeFermion & result,
 | 
			
		||||
		   WilsonFermionR &     Dw,
 | 
			
		||||
		   double const     volume,
 | 
			
		||||
		   int const           dag );
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
 | 
			
		||||
  typename WilsonFermionR::ImplParams params;
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
  std::vector<int> seeds({1,2,3,4});
 | 
			
		||||
  RealD mass = 0.1;
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
 | 
			
		||||
  int Lmax = 32;
 | 
			
		||||
  int dmin = 0;
 | 
			
		||||
  if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
 | 
			
		||||
  if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
 | 
			
		||||
  for (int L=8; L<=Lmax; L*=2)
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<int> latt_size = std::vector<int>(4,L);
 | 
			
		||||
      for(int d=4; d>dmin; d--)
 | 
			
		||||
	{
 | 
			
		||||
	  if ( d<=3 ) { latt_size[d] *= 2; }
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage;
 | 
			
		||||
	  std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
 | 
			
		||||
	  std::cout << latt_size.back() << "\t\t";
 | 
			
		||||
 | 
			
		||||
	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
	  GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
	  GridParallelRNG  pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
 | 
			
		||||
	  LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
 | 
			
		||||
	  LatticeFermion    src(&Grid); random(pRNG,src);
 | 
			
		||||
	  LatticeFermion result(&Grid); result=zero;
 | 
			
		||||
 | 
			
		||||
	  double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
 | 
			
		||||
 | 
			
		||||
	  WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
 | 
			
		||||
      
 | 
			
		||||
	  bench_wilson(src,result,Dw,volume,DaggerNo);
 | 
			
		||||
	  bench_wilson(src,result,Dw,volume,DaggerYes);
 | 
			
		||||
	  std::cout << std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "============================================================================="<< std::endl;
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void bench_wilson (
 | 
			
		||||
		   LatticeFermion &    src,
 | 
			
		||||
		   LatticeFermion & result,
 | 
			
		||||
		   WilsonFermionR &     Dw,
 | 
			
		||||
		   double const     volume,
 | 
			
		||||
		   int const           dag )
 | 
			
		||||
{
 | 
			
		||||
  int ncall    = 1000;
 | 
			
		||||
  double t0    = usecond();
 | 
			
		||||
  for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
 | 
			
		||||
  double t1    = usecond();
 | 
			
		||||
  double flops = 1344 * volume * ncall;
 | 
			
		||||
  std::cout << flops/(t1-t0) << "\t\t";
 | 
			
		||||
}
 | 
			
		||||
@@ -40,14 +40,20 @@ int main(int argc,char **argv)
 | 
			
		||||
  std::ofstream os("zmm.dat");
 | 
			
		||||
 | 
			
		||||
  os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "====================================================================="<<std::endl;
 | 
			
		||||
  for(int L=4;L<=32;L+=4){
 | 
			
		||||
    for(int m=1;m<=2;m++){
 | 
			
		||||
      for(int Ls=8;Ls<=16;Ls+=8){
 | 
			
		||||
	std::vector<int> grid({L,L,m*L,m*L});
 | 
			
		||||
  std::cout << GridLogMessage <<"\t";
 | 
			
		||||
	for(int i=0;i<4;i++) { 
 | 
			
		||||
	  std::cout << grid[i]<<"x";
 | 
			
		||||
	}
 | 
			
		||||
	std::cout << Ls<<std::endl;
 | 
			
		||||
	std::cout << Ls<<"\t\t";
 | 
			
		||||
	bench(os,grid,Ls);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
@@ -104,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=50;
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
@@ -116,7 +121,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
  double flops=1344*volume/2;
 | 
			
		||||
 | 
			
		||||
  mfc = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s =   "<< mfc<<std::endl;
 | 
			
		||||
  std::cout<<mfc<<"\t\t";
 | 
			
		||||
 | 
			
		||||
  QCD::WilsonKernelsStatic::AsmOpt=1;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
@@ -125,7 +130,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfa = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s =   "<< mfa<<std::endl;
 | 
			
		||||
  std::cout<<mfa<<"\t\t";
 | 
			
		||||
  /*
 | 
			
		||||
  int dag=DaggerNo;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
@@ -163,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
  //resulta = (-0.5) * resulta;
 | 
			
		||||
 | 
			
		||||
  diff = resulto-resulta;
 | 
			
		||||
  std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
 | 
			
		||||
  std::cout<<std::endl;
 | 
			
		||||
  std::cout<<norm2(diff)<<std::endl;
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,39 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_dwf_ntpf Benchmark_dwf_sweep Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
 | 
			
		||||
Benchmark_comms_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
 | 
			
		||||
Benchmark_dwf_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_dwf_ntpf_SOURCES=Benchmark_dwf_ntpf.cc
 | 
			
		||||
Benchmark_dwf_ntpf_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_dwf_sweep_SOURCES=Benchmark_dwf_sweep.cc
 | 
			
		||||
Benchmark_dwf_sweep_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
 | 
			
		||||
Benchmark_memory_asynch_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
 | 
			
		||||
Benchmark_memory_bandwidth_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_su3_SOURCES=Benchmark_su3.cc
 | 
			
		||||
Benchmark_su3_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
 | 
			
		||||
Benchmark_wilson_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
 | 
			
		||||
Benchmark_zmm_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,8 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Test code
 | 
			
		||||
#
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								bootstrap.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
 | 
			
		||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
 | 
			
		||||
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
 | 
			
		||||
 | 
			
		||||
echo "-- deploying Eigen source..."
 | 
			
		||||
wget ${EIGEN_URL}
 | 
			
		||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
 | 
			
		||||
rm `basename ${EIGEN_URL}`
 | 
			
		||||
 | 
			
		||||
echo "-- copying fftw prototypes..."
 | 
			
		||||
wget ${FFTW_URL}
 | 
			
		||||
./scripts/update_fftw.sh `basename ${FFTW_URL}`
 | 
			
		||||
rm `basename ${FFTW_URL}`
 | 
			
		||||
 | 
			
		||||
echo '-- generating Make.inc files...'
 | 
			
		||||
./scripts/filelist
 | 
			
		||||
echo '-- generating configure script...'
 | 
			
		||||
autoreconf -fvi
 | 
			
		||||
							
								
								
									
										417
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										417
									
								
								configure.ac
									
									
									
									
									
								
							@@ -1,277 +1,293 @@
 | 
			
		||||
#                         -*- Autoconf -*-
 | 
			
		||||
# Process this file with autoconf to produce a configure script.
 | 
			
		||||
#
 | 
			
		||||
# Project Grid package  
 | 
			
		||||
# 
 | 
			
		||||
# Time-stamp: <2015-07-10 17:46:21 neo>
 | 
			
		||||
 | 
			
		||||
AC_PREREQ([2.63])
 | 
			
		||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
 | 
			
		||||
AC_CANONICAL_SYSTEM
 | 
			
		||||
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
 | 
			
		||||
AM_INIT_AUTOMAKE(subdir-objects)
 | 
			
		||||
AC_CONFIG_MACRO_DIR([m4])
 | 
			
		||||
AC_LINK_FILES(lib,include/Grid )
 | 
			
		||||
AC_CONFIG_SRCDIR([lib/Grid.h])
 | 
			
		||||
AC_CONFIG_HEADERS([lib/Config.h])
 | 
			
		||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 | 
			
		||||
 | 
			
		||||
AC_MSG_NOTICE([
 | 
			
		||||
 | 
			
		||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
Configuring $PACKAGE v$VERSION  for $host
 | 
			
		||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
# Checks for programs.
 | 
			
		||||
############### Checks for programs
 | 
			
		||||
AC_LANG(C++)
 | 
			
		||||
CXXFLAGS="-O3 $CXXFLAGS"
 | 
			
		||||
AC_PROG_CXX
 | 
			
		||||
 | 
			
		||||
############ openmp  ###############
 | 
			
		||||
AC_OPENMP
 | 
			
		||||
AC_PROG_RANLIB
 | 
			
		||||
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
 | 
			
		||||
AX_EXT
 | 
			
		||||
 | 
			
		||||
# Checks for libraries.
 | 
			
		||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
 | 
			
		||||
ac_openmp=no
 | 
			
		||||
 | 
			
		||||
# Checks for header files.
 | 
			
		||||
if test "${OPENMP_CXXFLAGS}X" != "X"; then
 | 
			
		||||
ac_openmp=yes
 | 
			
		||||
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
 | 
			
		||||
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
############ libtool ###############
 | 
			
		||||
LT_INIT
 | 
			
		||||
 | 
			
		||||
############### Checks for header files
 | 
			
		||||
AC_CHECK_HEADERS(stdint.h)
 | 
			
		||||
AC_CHECK_HEADERS(mm_malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(malloc/malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(malloc.h)
 | 
			
		||||
AC_CHECK_HEADERS(endian.h)
 | 
			
		||||
AC_CHECK_HEADERS(execinfo.h)
 | 
			
		||||
AC_CHECK_HEADERS(gmp.h)
 | 
			
		||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
 | 
			
		||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
 | 
			
		||||
 | 
			
		||||
# Checks for typedefs, structures, and compiler characteristics.
 | 
			
		||||
############### Checks for typedefs, structures, and compiler characteristics
 | 
			
		||||
AC_TYPE_SIZE_T
 | 
			
		||||
AC_TYPE_UINT32_T
 | 
			
		||||
AC_TYPE_UINT64_T
 | 
			
		||||
 | 
			
		||||
# Checks for library functions.
 | 
			
		||||
echo
 | 
			
		||||
echo Checking libraries 
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
############### GMP and MPFR #################
 | 
			
		||||
AC_ARG_WITH([gmp],
 | 
			
		||||
    [AS_HELP_STRING([--with-gmp=prefix],
 | 
			
		||||
    [try this for a non-standard install prefix of the GMP library])],
 | 
			
		||||
    [AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
 | 
			
		||||
    [AM_LDFLAGS="-L$with_gmp/lib" $AM_LDFLAGS])
 | 
			
		||||
AC_ARG_WITH([mpfr],
 | 
			
		||||
    [AS_HELP_STRING([--with-mpfr=prefix],
 | 
			
		||||
    [try this for a non-standard install prefix of the MPFR library])],
 | 
			
		||||
    [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
 | 
			
		||||
    [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
 | 
			
		||||
 | 
			
		||||
################## lapack ####################
 | 
			
		||||
AC_ARG_ENABLE([lapack],
 | 
			
		||||
    [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], 
 | 
			
		||||
    [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
 | 
			
		||||
 | 
			
		||||
case ${ac_LAPACK} in
 | 
			
		||||
    no)
 | 
			
		||||
        ;;
 | 
			
		||||
    yes)
 | 
			
		||||
        AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
 | 
			
		||||
    *)
 | 
			
		||||
        AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
 | 
			
		||||
        AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
 | 
			
		||||
        AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
################## FFTW3 ####################
 | 
			
		||||
AC_ARG_WITH([fftw],    
 | 
			
		||||
            [AS_HELP_STRING([--with-fftw=prefix],
 | 
			
		||||
            [try this for a non-standard install prefix of the FFTW3 library])],
 | 
			
		||||
            [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
 | 
			
		||||
            [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# What about the MKL library replacement for fftw3 ?  How do we know if fftw_execute
 | 
			
		||||
# can be found in MKL? 
 | 
			
		||||
#
 | 
			
		||||
AC_CHECK_LIB([fftw3],[fftw_execute],
 | 
			
		||||
	[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes],
 | 
			
		||||
        [ac_fftw=no])
 | 
			
		||||
 | 
			
		||||
case ${ac_fftw} in
 | 
			
		||||
    no)
 | 
			
		||||
        echo WARNING libfftw3 not found FFT routines will not work
 | 
			
		||||
        ;;
 | 
			
		||||
    yes)
 | 
			
		||||
        AM_LDFLAGS="$AM_LDFLAGS -lfftw3 -lfftw3f"
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
################ Get compiler informations
 | 
			
		||||
AC_LANG([C++])
 | 
			
		||||
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
 | 
			
		||||
AX_COMPILER_VENDOR
 | 
			
		||||
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
 | 
			
		||||
      [vendor of C++ compiler that will compile the code])
 | 
			
		||||
AX_GXX_VERSION
 | 
			
		||||
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
 | 
			
		||||
      [version of g++ that will compile the code])
 | 
			
		||||
 | 
			
		||||
############### Checks for library functions
 | 
			
		||||
CXXFLAGS_CPY=$CXXFLAGS
 | 
			
		||||
LDFLAGS_CPY=$LDFLAGS
 | 
			
		||||
LIBS_CPY=$LIBS
 | 
			
		||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
 | 
			
		||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
 | 
			
		||||
AC_CHECK_FUNCS([gettimeofday])
 | 
			
		||||
AC_CHECK_LIB([gmp],[__gmpf_init],
 | 
			
		||||
             [AC_CHECK_LIB([mpfr],[mpfr_init],
 | 
			
		||||
                 [AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
 | 
			
		||||
                 [have_mpfr=true]
 | 
			
		||||
                 [LIBS="$LIBS -lmpfr"],
 | 
			
		||||
                 [AC_MSG_ERROR([MPFR library not found])])]
 | 
			
		||||
   	     [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
 | 
			
		||||
             [have_gmp=true]
 | 
			
		||||
             [LIBS="$LIBS -lgmp"],
 | 
			
		||||
             [AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.gmplib.org)])
 | 
			
		||||
if test "${ac_LAPACK}x" != "nox"; then
 | 
			
		||||
    AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
 | 
			
		||||
                 [AC_MSG_ERROR("LAPACK enabled but library not found")])
 | 
			
		||||
fi
 | 
			
		||||
CXXFLAGS=$CXXFLAGS_CPY
 | 
			
		||||
LDFLAGS=$LDFLAGS_CPY
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.mpfr.org/)])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# SIMD instructions selection
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
 | 
			
		||||
############### SIMD instruction selection
 | 
			
		||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
 | 
			
		||||
	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
 | 
			
		||||
 | 
			
		||||
supported=no
 | 
			
		||||
 | 
			
		||||
ac_ZMM=no;
 | 
			
		||||
case ${ax_cv_cxx_compiler_vendor} in
 | 
			
		||||
  clang|gnu)
 | 
			
		||||
    case ${ac_SIMD} in
 | 
			
		||||
      SSE4)
 | 
			
		||||
        AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-msse4.2';;
 | 
			
		||||
      AVX)
 | 
			
		||||
        AC_DEFINE([AVX1],[1],[AVX intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx';;
 | 
			
		||||
      AVXFMA4)
 | 
			
		||||
        AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
 | 
			
		||||
        SIMD_FLAGS='-mavx -mfma4';;
 | 
			
		||||
      AVX2)
 | 
			
		||||
        AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx2 -mfma';;
 | 
			
		||||
      AVX512|AVX512MIC|KNL)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
 | 
			
		||||
      IMCI|KNC)
 | 
			
		||||
        AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      GEN)
 | 
			
		||||
        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      *)
 | 
			
		||||
        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
 | 
			
		||||
    esac;;
 | 
			
		||||
  intel)
 | 
			
		||||
    case ${ac_SIMD} in
 | 
			
		||||
      SSE4)
 | 
			
		||||
        AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-msse4.2 -xsse4.2';;
 | 
			
		||||
      AVX)
 | 
			
		||||
        AC_DEFINE([AVX1],[1],[AVX intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-mavx -xavx';;
 | 
			
		||||
      AVXFMA4)
 | 
			
		||||
        AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
 | 
			
		||||
        SIMD_FLAGS='-mavx -xavx -mfma';;
 | 
			
		||||
      AVX2)
 | 
			
		||||
        AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
 | 
			
		||||
      AVX512)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
 | 
			
		||||
        SIMD_FLAGS='-xcore-avx512';;
 | 
			
		||||
      AVX512MIC|KNL)
 | 
			
		||||
        AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
 | 
			
		||||
        SIMD_FLAGS='-xmic-avx512';;
 | 
			
		||||
      IMCI|KNC)
 | 
			
		||||
        AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      GEN)
 | 
			
		||||
        AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
 | 
			
		||||
        SIMD_FLAGS='';;
 | 
			
		||||
      *)
 | 
			
		||||
        AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
 | 
			
		||||
    esac;;
 | 
			
		||||
  *)
 | 
			
		||||
    AC_MSG_WARN([Compiler unknown, using generic vector code])
 | 
			
		||||
    AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
 | 
			
		||||
esac
 | 
			
		||||
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
 | 
			
		||||
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
 | 
			
		||||
 | 
			
		||||
case ${ac_SIMD} in
 | 
			
		||||
     SSE4)
 | 
			
		||||
       echo Configuring for SSE4
 | 
			
		||||
       AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_ssse3_ext" = x"yes"; then  dnl minimal support for SSE4
 | 
			
		||||
         supported=yes
 | 
			
		||||
       else
 | 
			
		||||
  	AC_MSG_WARN([Your processor does not support SSE4 instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX)
 | 
			
		||||
       echo Configuring for AVX
 | 
			
		||||
       AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
 | 
			
		||||
       supported=yes			  
 | 
			
		||||
       else
 | 
			
		||||
       	AC_MSG_WARN([Your processor does not support AVX instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVXFMA4)
 | 
			
		||||
       echo Configuring for AVX
 | 
			
		||||
       AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
 | 
			
		||||
       if test x"$ax_cv_support_avx_ext" = x"yes"; then  dnl minimal support for AVX
 | 
			
		||||
       supported=yes			  
 | 
			
		||||
       else
 | 
			
		||||
       	AC_MSG_WARN([Your processor does not support AVX instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX2)
 | 
			
		||||
       echo Configuring for AVX2
 | 
			
		||||
       AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
 | 
			
		||||
       if test x"$ax_cv_support_avx2_ext" = x"yes"; then  dnl minimal support for AVX2
 | 
			
		||||
       supported=yes
 | 
			
		||||
       else
 | 
			
		||||
       AC_MSG_WARN([Your processor does not support AVX2 instructions])
 | 
			
		||||
       fi
 | 
			
		||||
     ;;
 | 
			
		||||
     AVX512)
 | 
			
		||||
       echo Configuring for AVX512 
 | 
			
		||||
       AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
       ac_ZMM=yes;
 | 
			
		||||
     ;;
 | 
			
		||||
     IMCI)
 | 
			
		||||
       echo Configuring for IMCI
 | 
			
		||||
       AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
       ac_ZMM=no;
 | 
			
		||||
     ;;
 | 
			
		||||
     NEONv8)
 | 
			
		||||
       echo Configuring for experimental ARMv8a support 
 | 
			
		||||
       AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
     ;;
 | 
			
		||||
     DEBUG)
 | 
			
		||||
       echo Configuring without SIMD support - only for compiler DEBUGGING!
 | 
			
		||||
       AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
 | 
			
		||||
      ;;     
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]); 
 | 
			
		||||
     ;;
 | 
			
		||||
  AVX512|AVX512MIC|KNL)
 | 
			
		||||
    AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
 | 
			
		||||
  *)
 | 
			
		||||
	;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
case ${ac_ZMM} in
 | 
			
		||||
yes)
 | 
			
		||||
	echo Enabling ZMM source code
 | 
			
		||||
;;
 | 
			
		||||
no)
 | 
			
		||||
	echo Disabling ZMM source code
 | 
			
		||||
;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
 | 
			
		||||
 | 
			
		||||
############### precision selection
 | 
			
		||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
 | 
			
		||||
case ${ac_PRECISION} in
 | 
			
		||||
     single)
 | 
			
		||||
       echo default precision is single
 | 
			
		||||
       AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
 | 
			
		||||
     ;;
 | 
			
		||||
     double)
 | 
			
		||||
       echo default precision is double
 | 
			
		||||
       AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Comms selection
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
 | 
			
		||||
############### communication type selection
 | 
			
		||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
 | 
			
		||||
 | 
			
		||||
case ${ac_COMMS} in
 | 
			
		||||
     none)
 | 
			
		||||
       echo Configuring for NO communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
 | 
			
		||||
     ;;
 | 
			
		||||
     mpi-auto)
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
			
		||||
       LX_FIND_MPI
 | 
			
		||||
       if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
 | 
			
		||||
       AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
 | 
			
		||||
       AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
 | 
			
		||||
       AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
 | 
			
		||||
       LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
 | 
			
		||||
     ;;
 | 
			
		||||
     mpi)
 | 
			
		||||
       echo Configuring for MPI communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
			
		||||
     ;;
 | 
			
		||||
     shmem)
 | 
			
		||||
       echo Configuring for SHMEM communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# RNG selection
 | 
			
		||||
#
 | 
			
		||||
############### RNG selection
 | 
			
		||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
 | 
			
		||||
	[Select Random Number Generator to be used])],\
 | 
			
		||||
	[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
 | 
			
		||||
 | 
			
		||||
case ${ac_RNG} in
 | 
			
		||||
     ranlux48)
 | 
			
		||||
     AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
 | 
			
		||||
      AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
 | 
			
		||||
     ;;
 | 
			
		||||
     mt19937)
 | 
			
		||||
     AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
 | 
			
		||||
      AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); 
 | 
			
		||||
      AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# SDE timing mode
 | 
			
		||||
#
 | 
			
		||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\
 | 
			
		||||
############### timer option
 | 
			
		||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
 | 
			
		||||
	[Enable system dependent high res timers])],\
 | 
			
		||||
	[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
 | 
			
		||||
case ${ac_TIMERS} in
 | 
			
		||||
     yes)
 | 
			
		||||
     AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
 | 
			
		||||
      AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
     AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
 | 
			
		||||
      AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); 
 | 
			
		||||
      AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Chroma regression tests
 | 
			
		||||
#
 | 
			
		||||
############### Chroma regression test
 | 
			
		||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
 | 
			
		||||
case ${ac_CHROMA} in
 | 
			
		||||
     yes)
 | 
			
		||||
       echo Enabling tests regressing to Chroma
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
       echo Disabling tests regressing to Chroma
 | 
			
		||||
     yes|no)
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); 
 | 
			
		||||
       AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Lapack
 | 
			
		||||
#
 | 
			
		||||
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
 | 
			
		||||
 | 
			
		||||
case ${ac_LAPACK} in
 | 
			
		||||
     yes)
 | 
			
		||||
       echo Enabling lapack
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
       echo Disabling lapack
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
       echo Enabling lapack at ${ac_LAPACK}
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
 | 
			
		||||
AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
 | 
			
		||||
 | 
			
		||||
###################################################################
 | 
			
		||||
# Checks for doxygen support
 | 
			
		||||
# if present enables the "make doxyfile" command
 | 
			
		||||
echo
 | 
			
		||||
echo Checking doxygen support 
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
############### Doxygen
 | 
			
		||||
AC_PROG_DOXYGEN
 | 
			
		||||
 | 
			
		||||
if test -n "$DOXYGEN"
 | 
			
		||||
@@ -279,9 +295,14 @@ then
 | 
			
		||||
AC_CONFIG_FILES([docs/doxy.cfg])
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
echo
 | 
			
		||||
echo Creating configuration files
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
############### Ouput
 | 
			
		||||
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
 | 
			
		||||
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
 | 
			
		||||
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
 | 
			
		||||
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
 | 
			
		||||
AC_SUBST([AM_CFLAGS])
 | 
			
		||||
AC_SUBST([AM_CXXFLAGS])
 | 
			
		||||
AC_SUBST([AM_LDFLAGS])
 | 
			
		||||
AC_CONFIG_FILES(Makefile)
 | 
			
		||||
AC_CONFIG_FILES(lib/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/Makefile)
 | 
			
		||||
@@ -293,30 +314,34 @@ AC_CONFIG_FILES(tests/hmc/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/solver/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(benchmarks/Makefile)
 | 
			
		||||
AC_CONFIG_FILES(prerequisites/Makefile)
 | 
			
		||||
AC_OUTPUT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
echo "
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
Summary of configuration for $PACKAGE v$VERSION
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
The following features are enabled:
 | 
			
		||||
 | 
			
		||||
----- PLATFORM ----------------------------------------
 | 
			
		||||
- architecture (build)          : $build_cpu
 | 
			
		||||
- os (build)                    : $build_os
 | 
			
		||||
- architecture (target)         : $target_cpu
 | 
			
		||||
- os (target)                   : $target_os
 | 
			
		||||
- compiler vendor               : ${ax_cv_cxx_compiler_vendor}
 | 
			
		||||
- compiler version              : ${ax_cv_gxx_version}
 | 
			
		||||
----- BUILD OPTIONS -----------------------------------
 | 
			
		||||
- SIMD                          : ${ac_SIMD}
 | 
			
		||||
- Threading                     : ${ac_openmp} 
 | 
			
		||||
- Communications type           : ${ac_COMMS}
 | 
			
		||||
- Default precision             : ${ac_PRECISION}
 | 
			
		||||
- RNG choice                    : ${ac_RNG} 
 | 
			
		||||
- GMP                           : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
 | 
			
		||||
- LAPACK                        : ${ac_LAPACK}
 | 
			
		||||
- FFTW                          : ${ac_fftw}
 | 
			
		||||
- build DOXYGEN documentation   : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
 | 
			
		||||
- graphs and diagrams           : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
 | 
			
		||||
- Supported SIMD flags          : $SIMD_FLAGS
 | 
			
		||||
----------------------------------------------------------
 | 
			
		||||
- enabled simd support          : ${ac_SIMD}   (config macro says supported: $supported )
 | 
			
		||||
- communications type           : ${ac_COMMS}
 | 
			
		||||
- default precision             : ${ac_PRECISION}
 | 
			
		||||
- RNG choice                    : ${ac_RNG} 
 | 
			
		||||
- LAPACK	                : ${ac_LAPACK} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
----- BUILD FLAGS -------------------------------------
 | 
			
		||||
- CXXFLAGS:  "${AM_CXXFLAGS} ${CXXFLAGS}"
 | 
			
		||||
- LDFLAGS:   "${AM_LDFLAGS} ${LDFLAGS}"
 | 
			
		||||
- LIBS:      "${LIBS} "
 | 
			
		||||
-------------------------------------------------------
 | 
			
		||||
"
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										276
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										276
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,276 @@
 | 
			
		||||
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cshift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_FFT_H_
 | 
			
		||||
#define _GRID_FFT_H_
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW	
 | 
			
		||||
#include <fftw3.h>
 | 
			
		||||
#endif
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct FFTW { };
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_FFTW	
 | 
			
		||||
  template<> struct FFTW<ComplexD> {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    typedef fftw_complex FFTW_scalar;
 | 
			
		||||
    typedef fftw_plan    FFTW_plan;
 | 
			
		||||
 | 
			
		||||
    static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
 | 
			
		||||
					FFTW_scalar *in, const int *inembed,		
 | 
			
		||||
					int istride, int idist,		
 | 
			
		||||
					FFTW_scalar *out, const int *onembed,		
 | 
			
		||||
					int ostride, int odist,		
 | 
			
		||||
					int sign, unsigned flags) {
 | 
			
		||||
      return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
 | 
			
		||||
    }	  
 | 
			
		||||
    
 | 
			
		||||
    static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
 | 
			
		||||
      ::fftw_flops(p,add,mul,fmas);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
 | 
			
		||||
      ::fftw_execute_dft(p,in,out);
 | 
			
		||||
    }
 | 
			
		||||
    inline static void fftw_destroy_plan(const FFTW_plan p) {
 | 
			
		||||
      ::fftw_destroy_plan(p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<> struct FFTW<ComplexF> {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    typedef fftwf_complex FFTW_scalar;
 | 
			
		||||
    typedef fftwf_plan    FFTW_plan;
 | 
			
		||||
 | 
			
		||||
    static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
 | 
			
		||||
					FFTW_scalar *in, const int *inembed,		
 | 
			
		||||
					int istride, int idist,		
 | 
			
		||||
					FFTW_scalar *out, const int *onembed,		
 | 
			
		||||
					int ostride, int odist,		
 | 
			
		||||
					int sign, unsigned flags) {
 | 
			
		||||
      return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
 | 
			
		||||
    }	  
 | 
			
		||||
    
 | 
			
		||||
    static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
 | 
			
		||||
      ::fftwf_flops(p,add,mul,fmas);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
 | 
			
		||||
      ::fftwf_execute_dft(p,in,out);
 | 
			
		||||
    }
 | 
			
		||||
    inline static void fftw_destroy_plan(const FFTW_plan p) {
 | 
			
		||||
      ::fftwf_destroy_plan(p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef FFTW_FORWARD
 | 
			
		||||
#define FFTW_FORWARD (-1)
 | 
			
		||||
#define FFTW_BACKWARD (+1)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  class FFT { 
 | 
			
		||||
  private:
 | 
			
		||||
 | 
			
		||||
    GridCartesian *vgrid;
 | 
			
		||||
    GridCartesian *sgrid;
 | 
			
		||||
 | 
			
		||||
    int Nd;
 | 
			
		||||
    double flops;
 | 
			
		||||
    double flops_call;
 | 
			
		||||
    uint64_t usec;
 | 
			
		||||
 | 
			
		||||
    std::vector<int> dimensions;
 | 
			
		||||
    std::vector<int> processors;
 | 
			
		||||
    std::vector<int> processor_coor;
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    static const int forward=FFTW_FORWARD;
 | 
			
		||||
    static const int backward=FFTW_BACKWARD;
 | 
			
		||||
 | 
			
		||||
    double Flops(void) {return flops;}
 | 
			
		||||
    double MFlops(void) {return flops/usec;}
 | 
			
		||||
 | 
			
		||||
    FFT ( GridCartesian * grid ) : 
 | 
			
		||||
      vgrid(grid),
 | 
			
		||||
      Nd(grid->_ndimension),
 | 
			
		||||
      dimensions(grid->_fdimensions),
 | 
			
		||||
      processors(grid->_processors),
 | 
			
		||||
      processor_coor(grid->_processor_coor)
 | 
			
		||||
    {
 | 
			
		||||
      flops=0;
 | 
			
		||||
      usec =0;
 | 
			
		||||
      std::vector<int> layout(Nd,1);
 | 
			
		||||
      sgrid = new GridCartesian(dimensions,layout,processors);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ~FFT ( void)  { 
 | 
			
		||||
      delete sgrid; 
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
 | 
			
		||||
 | 
			
		||||
      conformable(result._grid,vgrid);
 | 
			
		||||
      conformable(source._grid,vgrid);
 | 
			
		||||
 | 
			
		||||
      int L = vgrid->_ldimensions[dim];
 | 
			
		||||
      int G = vgrid->_fdimensions[dim];
 | 
			
		||||
 | 
			
		||||
      std::vector<int> layout(Nd,1);
 | 
			
		||||
      std::vector<int> pencil_gd(vgrid->_fdimensions);
 | 
			
		||||
 | 
			
		||||
      pencil_gd[dim] = G*processors[dim];    
 | 
			
		||||
 | 
			
		||||
      // Pencil global vol LxLxGxLxL per node
 | 
			
		||||
      GridCartesian pencil_g(pencil_gd,layout,processors);
 | 
			
		||||
 | 
			
		||||
      // Construct pencils
 | 
			
		||||
      typedef typename vobj::scalar_object sobj;
 | 
			
		||||
      typedef typename sobj::scalar_type   scalar;
 | 
			
		||||
 | 
			
		||||
      Lattice<vobj> ssource(vgrid); ssource =source;
 | 
			
		||||
      Lattice<sobj> pgsource(&pencil_g);
 | 
			
		||||
      Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
 | 
			
		||||
 | 
			
		||||
#ifndef HAVE_FFTW	
 | 
			
		||||
      assert(0);
 | 
			
		||||
#else 
 | 
			
		||||
      typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
 | 
			
		||||
      typedef typename FFTW<scalar>::FFTW_plan   FFTW_plan;
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	int Ncomp = sizeof(sobj)/sizeof(scalar);
 | 
			
		||||
	int Nlow  = 1;
 | 
			
		||||
	for(int d=0;d<dim;d++){
 | 
			
		||||
	  Nlow*=vgrid->_ldimensions[d];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int rank = 1;  /* 1d transforms */
 | 
			
		||||
	int n[] = {G}; /* 1d transforms of length G */
 | 
			
		||||
	int howmany = Ncomp;
 | 
			
		||||
	int odist,idist,istride,ostride;
 | 
			
		||||
	idist   = odist   = 1;          /* Distance between consecutive FT's */
 | 
			
		||||
	istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
 | 
			
		||||
	int *inembed = n, *onembed = n;
 | 
			
		||||
 | 
			
		||||
	
 | 
			
		||||
	int sign = FFTW_FORWARD;
 | 
			
		||||
	if (inverse) sign = FFTW_BACKWARD;
 | 
			
		||||
 | 
			
		||||
	FFTW_plan p;
 | 
			
		||||
	{
 | 
			
		||||
	  FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
 | 
			
		||||
	  FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
 | 
			
		||||
	  p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
 | 
			
		||||
					       in,inembed,
 | 
			
		||||
					       istride,idist,
 | 
			
		||||
					       out,onembed,
 | 
			
		||||
					       ostride, odist,
 | 
			
		||||
					       sign,FFTW_ESTIMATE);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	double add,mul,fma;
 | 
			
		||||
	FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
 | 
			
		||||
	flops_call = add+mul+2.0*fma;
 | 
			
		||||
 | 
			
		||||
	GridStopWatch timer;
 | 
			
		||||
 | 
			
		||||
	// Barrel shift and collect global pencil
 | 
			
		||||
	for(int p=0;p<processors[dim];p++) { 
 | 
			
		||||
 | 
			
		||||
	  for(int idx=0;idx<sgrid->lSites();idx++) { 
 | 
			
		||||
 | 
			
		||||
	    std::vector<int> lcoor(Nd);
 | 
			
		||||
    	    sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
 | 
			
		||||
	    sobj s;
 | 
			
		||||
 | 
			
		||||
	    peekLocalSite(s,ssource,lcoor);
 | 
			
		||||
 | 
			
		||||
	    lcoor[dim]+=p*L;
 | 
			
		||||
	   
 | 
			
		||||
	    pokeLocalSite(s,pgsource,lcoor);
 | 
			
		||||
	  }
 | 
			
		||||
 | 
			
		||||
	  ssource = Cshift(ssource,dim,L);
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	// Loop over orthog coords
 | 
			
		||||
	int NN=pencil_g.lSites();
 | 
			
		||||
 | 
			
		||||
	GridStopWatch Timer;
 | 
			
		||||
	Timer.Start();
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
	for(int idx=0;idx<NN;idx++) { 
 | 
			
		||||
 | 
			
		||||
	  std::vector<int> lcoor(Nd);
 | 
			
		||||
	  pencil_g.LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
 | 
			
		||||
	  if ( lcoor[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0
 | 
			
		||||
	    FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
 | 
			
		||||
	    FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
 | 
			
		||||
	    FFTW<scalar>::fftw_execute_dft(p,in,out);
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
        Timer.Stop();
 | 
			
		||||
	usec += Timer.useconds();
 | 
			
		||||
	flops+= flops_call*NN;
 | 
			
		||||
 | 
			
		||||
        int pc = processor_coor[dim];
 | 
			
		||||
        for(int idx=0;idx<sgrid->lSites();idx++) { 
 | 
			
		||||
	  std::vector<int> lcoor(Nd);
 | 
			
		||||
	  sgrid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
	  std::vector<int> gcoor = lcoor;
 | 
			
		||||
	  // extract the result
 | 
			
		||||
	  sobj s;
 | 
			
		||||
	  gcoor[dim] = lcoor[dim]+L*pc;
 | 
			
		||||
	  peekLocalSite(s,pgresult,gcoor);
 | 
			
		||||
	  pokeLocalSite(s,result,lcoor);
 | 
			
		||||
	}
 | 
			
		||||
      	  
 | 
			
		||||
	FFTW<scalar>::fftw_destroy_plan(p);
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/Simd.h>
 | 
			
		||||
#include <Grid/Threads.h>
 | 
			
		||||
#include <Grid/Lexicographic.h>
 | 
			
		||||
#include <Grid/Init.h>
 | 
			
		||||
#include <Grid/Communicator.h> 
 | 
			
		||||
#include <Grid/Cartesian.h>    
 | 
			
		||||
#include <Grid/Tensors.h>      
 | 
			
		||||
@@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/parallelIO/BinaryIO.h>
 | 
			
		||||
#include <Grid/qcd/QCD.h>
 | 
			
		||||
#include <Grid/parallelIO/NerscIO.h>
 | 
			
		||||
#include <Grid/Init.h>
 | 
			
		||||
 | 
			
		||||
#include <Grid/FFT.h>
 | 
			
		||||
 | 
			
		||||
#include <Grid/qcd/hmc/NerscCheckpointer.h>
 | 
			
		||||
#include <Grid/qcd/hmc/HmcRunner.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
 | 
			
		||||
    assert(ompthreads.size()==1);
 | 
			
		||||
    GridThread::SetThreads(ompthreads[0]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
 | 
			
		||||
    std::vector<int> cores(0);
 | 
			
		||||
    arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
 | 
			
		||||
@@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    GridLogConfigure(logstreams);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
 | 
			
		||||
    Grid_debug_handler_init();
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@@ -1,6 +1,3 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include/
 | 
			
		||||
 | 
			
		||||
extra_sources=
 | 
			
		||||
if BUILD_COMMS_MPI
 | 
			
		||||
  extra_sources+=communicator/Communicator_mpi.cc
 | 
			
		||||
@@ -20,16 +17,8 @@ endif
 | 
			
		||||
include Make.inc
 | 
			
		||||
include Eigen.inc
 | 
			
		||||
 | 
			
		||||
lib_LIBRARIES = libGrid.a
 | 
			
		||||
 | 
			
		||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
 | 
			
		||||
 | 
			
		||||
fftwdir = $(prefix)/lib/
 | 
			
		||||
fftw_DATA = libfftw3.a
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Include files
 | 
			
		||||
#
 | 
			
		||||
otherincludedir = $(includedir)/Grid
 | 
			
		||||
nobase_otherinclude_HEADERS =$(HFILES) $(EFILES) fftw3.h Config.h
 | 
			
		||||
lib_LTLIBRARIES = libGrid.la
 | 
			
		||||
 | 
			
		||||
libGrid_la_SOURCES             = $(CCFILES) $(extra_sources)
 | 
			
		||||
libGrid_ladir                  = $(pkgincludedir)
 | 
			
		||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
 | 
			
		||||
 
 | 
			
		||||
@@ -18,10 +18,10 @@
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_GMP_H
 | 
			
		||||
#ifdef HAVE_LIBGMP
 | 
			
		||||
#include "bigfloat.h"
 | 
			
		||||
#else
 | 
			
		||||
#include "algorithms/approx/bigfloat_double.h"
 | 
			
		||||
#include "bigfloat_double.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
 | 
			
		||||
 
 | 
			
		||||
@@ -127,21 +127,12 @@ class CartesianCommunicator {
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			    void *xmit,
 | 
			
		||||
			    int xmit_to_rank,
 | 
			
		||||
			    void *recv,
 | 
			
		||||
			    int recv_from_rank,
 | 
			
		||||
			    int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			 void *xmit,
 | 
			
		||||
			 int xmit_to_rank,
 | 
			
		||||
			 void *recv,
 | 
			
		||||
			 int recv_from_rank,
 | 
			
		||||
			 int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
 | 
			
		||||
    void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -144,28 +144,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
					       void *xmit,
 | 
			
		||||
					       int dest,
 | 
			
		||||
					       void *recv,
 | 
			
		||||
					       int from,
 | 
			
		||||
					       int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  int rank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
  ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  list.push_back(xrq);
 | 
			
		||||
  list.push_back(rrq);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Startall(list.size(),&list[0]);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
@@ -173,12 +151,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<CommsRequest_t> reqs(0);
 | 
			
		||||
  SendToRecvFromInit(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromBegin(reqs);
 | 
			
		||||
  for(int i=0;i<reqs.size();i++){
 | 
			
		||||
    list.push_back(reqs[i]);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Request xrq;
 | 
			
		||||
  MPI_Request rrq;
 | 
			
		||||
  int rank = _processor;
 | 
			
		||||
  int ierr;
 | 
			
		||||
  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
			
		||||
  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
			
		||||
  
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
 | 
			
		||||
  list.push_back(xrq);
 | 
			
		||||
  list.push_back(rrq);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -84,19 +84,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
 
 | 
			
		||||
@@ -268,10 +268,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  assert(0); //unimplemented
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
@@ -284,15 +280,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
 | 
			
		||||
  //  shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
 | 
			
		||||
  shmem_putmem(recv,xmit,bytes,dest);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0); // Unimplemented
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  //  shmem_quiet();      // I'm done
 | 
			
		||||
 
 | 
			
		||||
@@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
 | 
			
		||||
    assert(ig->_ldimensions[d] == og->_ldimensions[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  //PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<ig->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(ni);
 | 
			
		||||
    ig->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
@@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
    assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
  //PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    if( lcoor[orthog] == slice_lo ) { 
 | 
			
		||||
      hcoor=lcoor;
 | 
			
		||||
      hcoor[orthog] = slice_hi;
 | 
			
		||||
      peekLocalSite(s,lowDim,lcoor);
 | 
			
		||||
      pokeLocalSite(s,higherDim,hcoor);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    assert(lg->_processors[d]  == hg->_processors[d]);
 | 
			
		||||
    assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
  //PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    if( lcoor[orthog] == slice_lo ) { 
 | 
			
		||||
      hcoor=lcoor;
 | 
			
		||||
      hcoor[orthog] = slice_hi;
 | 
			
		||||
      peekLocalSite(s,higherDim,hcoor);
 | 
			
		||||
      pokeLocalSite(s,lowDim,lcoor);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -111,6 +111,8 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction
 | 
			
		||||
#define FermOp4dVecTemplateInstantiate(A) \
 | 
			
		||||
  template class A<WilsonImplF>;		\
 | 
			
		||||
  template class A<WilsonImplD>;		\
 | 
			
		||||
  template class A<ZWilsonImplF>;		\
 | 
			
		||||
  template class A<ZWilsonImplD>;		\
 | 
			
		||||
  template class A<GparityWilsonImplF>;		\
 | 
			
		||||
  template class A<GparityWilsonImplD>;		
 | 
			
		||||
 | 
			
		||||
@@ -120,7 +122,9 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction
 | 
			
		||||
 | 
			
		||||
#define FermOp5dVecTemplateInstantiate(A) \
 | 
			
		||||
  template class A<DomainWallVec5dImplF>;	\
 | 
			
		||||
  template class A<DomainWallVec5dImplD>;	
 | 
			
		||||
  template class A<DomainWallVec5dImplD>;	\
 | 
			
		||||
  template class A<ZDomainWallVec5dImplF>;	\
 | 
			
		||||
  template class A<ZDomainWallVec5dImplD>;	
 | 
			
		||||
 | 
			
		||||
#define FermOpTemplateInstantiate(A) \
 | 
			
		||||
 FermOp4dVecTemplateInstantiate(A) \
 | 
			
		||||
@@ -143,6 +147,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD>        ConjugateSymanzikGaugeAction
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
 | 
			
		||||
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
 | 
			
		||||
@@ -185,6 +190,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
 | 
			
		||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
 | 
			
		||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
 | 
			
		||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
 | 
			
		||||
 | 
			
		||||
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
 | 
			
		||||
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
 | 
			
		||||
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
 | 
			
		||||
 | 
			
		||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
 | 
			
		||||
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
 | 
			
		||||
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
 | 
			
		||||
 
 | 
			
		||||
@@ -54,18 +54,18 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D   (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag (Ls,1.0);
 | 
			
		||||
  std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
 | 
			
		||||
  std::vector<RealD> lower(Ls,-1.0); lower[0]   =mass;
 | 
			
		||||
  std::vector<Coeff_t> diag (Ls,1.0);
 | 
			
		||||
  std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
 | 
			
		||||
  std::vector<Coeff_t> lower(Ls,-1.0); lower[0]   =mass;
 | 
			
		||||
  M5D(psi,chi,chi,lower,diag,upper);
 | 
			
		||||
}
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bs;
 | 
			
		||||
  std::vector<RealD> upper= cs;
 | 
			
		||||
  std::vector<RealD> lower= cs; 
 | 
			
		||||
  std::vector<Coeff_t> diag = bs;
 | 
			
		||||
  std::vector<Coeff_t> upper= cs;
 | 
			
		||||
  std::vector<Coeff_t> lower= cs; 
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5D(psi,psi,Din,lower,diag,upper);
 | 
			
		||||
@@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &D
 | 
			
		||||
template<class Impl> void CayleyFermion5D<Impl>::Meo5D     (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = beo;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
  std::vector<Coeff_t> diag = beo;
 | 
			
		||||
  std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
  std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
  for(int i=0;i<Ls;i++) {
 | 
			
		||||
    upper[i]=-ceo[i];
 | 
			
		||||
    lower[i]=-ceo[i];
 | 
			
		||||
@@ -88,9 +88,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bee;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
  std::vector<Coeff_t> diag = bee;
 | 
			
		||||
  std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
  std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
  for(int i=0;i<Ls;i++) {
 | 
			
		||||
    upper[i]=-cee[i];
 | 
			
		||||
    lower[i]=-cee[i];
 | 
			
		||||
@@ -104,9 +104,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag = bee;
 | 
			
		||||
  std::vector<RealD> upper(Ls);
 | 
			
		||||
  std::vector<RealD> lower(Ls);
 | 
			
		||||
  std::vector<Coeff_t> diag = bee;
 | 
			
		||||
  std::vector<Coeff_t> upper(Ls);
 | 
			
		||||
  std::vector<Coeff_t> lower(Ls);
 | 
			
		||||
 | 
			
		||||
  for (int s=0;s<Ls;s++){
 | 
			
		||||
    // Assemble the 5d matrix
 | 
			
		||||
@@ -129,9 +129,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag(Ls,1.0);
 | 
			
		||||
  std::vector<RealD> upper(Ls,-1.0);
 | 
			
		||||
  std::vector<RealD> lower(Ls,-1.0);
 | 
			
		||||
  std::vector<Coeff_t> diag(Ls,1.0);
 | 
			
		||||
  std::vector<Coeff_t> upper(Ls,-1.0);
 | 
			
		||||
  std::vector<Coeff_t> lower(Ls,-1.0);
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5Ddag(psi,chi,chi,lower,diag,upper);
 | 
			
		||||
@@ -141,9 +141,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  std::vector<RealD> diag =bs;
 | 
			
		||||
  std::vector<RealD> upper=cs;
 | 
			
		||||
  std::vector<RealD> lower=cs;
 | 
			
		||||
  std::vector<Coeff_t> diag =bs;
 | 
			
		||||
  std::vector<Coeff_t> upper=cs;
 | 
			
		||||
  std::vector<Coeff_t> lower=cs;
 | 
			
		||||
  upper[Ls-1]=-mass*upper[Ls-1];
 | 
			
		||||
  lower[0]   =-mass*lower[0];
 | 
			
		||||
  M5Ddag(psi,psi,Din,lower,diag,upper);
 | 
			
		||||
@@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
{
 | 
			
		||||
  SetCoefficientsZolotarev(1.0,zdata,b,c);
 | 
			
		||||
  std::vector<Coeff_t> gamma(this->Ls);
 | 
			
		||||
  for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
 | 
			
		||||
  SetCoefficientsInternal(1.0,gamma,b,c);
 | 
			
		||||
}
 | 
			
		||||
//Zolo
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<Coeff_t> gamma(this->Ls);
 | 
			
		||||
  for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
 | 
			
		||||
  SetCoefficientsInternal(zolo_hi,gamma,b,c);
 | 
			
		||||
}
 | 
			
		||||
//Zolo
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
 | 
			
		||||
@@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
 | 
			
		||||
  double bmc = b-c;
 | 
			
		||||
  for(int i=0; i < Ls; i++){
 | 
			
		||||
    as[i] = 1.0;
 | 
			
		||||
    omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
 | 
			
		||||
    omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
 | 
			
		||||
    bs[i] = 0.5*(bpc/omega[i] + bmc);
 | 
			
		||||
    cs[i] = 0.5*(bpc/omega[i] - bmc);
 | 
			
		||||
  }
 | 
			
		||||
@@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
 | 
			
		||||
  }
 | 
			
		||||
	
 | 
			
		||||
  { 
 | 
			
		||||
    double delta_d=mass*cee[Ls-1];
 | 
			
		||||
    Coeff_t delta_d=mass*cee[Ls-1];
 | 
			
		||||
    for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
 | 
			
		||||
    dee[Ls-1] += delta_d;
 | 
			
		||||
  }  
 | 
			
		||||
 
 | 
			
		||||
@@ -62,16 +62,16 @@ namespace Grid {
 | 
			
		||||
      void M5D(const FermionField &psi,
 | 
			
		||||
	       const FermionField &phi, 
 | 
			
		||||
	       FermionField &chi,
 | 
			
		||||
	       std::vector<RealD> &lower,
 | 
			
		||||
	       std::vector<RealD> &diag,
 | 
			
		||||
	       std::vector<RealD> &upper);
 | 
			
		||||
	       std::vector<Coeff_t> &lower,
 | 
			
		||||
	       std::vector<Coeff_t> &diag,
 | 
			
		||||
	       std::vector<Coeff_t> &upper);
 | 
			
		||||
 | 
			
		||||
      void M5Ddag(const FermionField &psi,
 | 
			
		||||
		  const FermionField &phi, 
 | 
			
		||||
		  FermionField &chi,
 | 
			
		||||
		  std::vector<RealD> &lower,
 | 
			
		||||
		  std::vector<RealD> &diag,
 | 
			
		||||
		  std::vector<RealD> &upper);
 | 
			
		||||
		  std::vector<Coeff_t> &lower,
 | 
			
		||||
		  std::vector<Coeff_t> &diag,
 | 
			
		||||
		  std::vector<Coeff_t> &upper);
 | 
			
		||||
      void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
 | 
			
		||||
 | 
			
		||||
      virtual void   Instantiatable(void)=0;
 | 
			
		||||
@@ -91,23 +91,23 @@ namespace Grid {
 | 
			
		||||
      RealD mass;
 | 
			
		||||
 | 
			
		||||
      // Cayley form Moebius (tanh and zolotarev)
 | 
			
		||||
      std::vector<RealD> omega; 
 | 
			
		||||
      std::vector<RealD> bs;    // S dependent coeffs
 | 
			
		||||
      std::vector<RealD> cs;    
 | 
			
		||||
      std::vector<RealD> as;    
 | 
			
		||||
      std::vector<Coeff_t> omega; 
 | 
			
		||||
      std::vector<Coeff_t> bs;    // S dependent coeffs
 | 
			
		||||
      std::vector<Coeff_t> cs;    
 | 
			
		||||
      std::vector<Coeff_t> as;    
 | 
			
		||||
      // For preconditioning Cayley form
 | 
			
		||||
      std::vector<RealD> bee;    
 | 
			
		||||
      std::vector<RealD> cee;    
 | 
			
		||||
      std::vector<RealD> aee;    
 | 
			
		||||
      std::vector<RealD> beo;    
 | 
			
		||||
      std::vector<RealD> ceo;    
 | 
			
		||||
      std::vector<RealD> aeo;    
 | 
			
		||||
      std::vector<Coeff_t> bee;    
 | 
			
		||||
      std::vector<Coeff_t> cee;    
 | 
			
		||||
      std::vector<Coeff_t> aee;    
 | 
			
		||||
      std::vector<Coeff_t> beo;    
 | 
			
		||||
      std::vector<Coeff_t> ceo;    
 | 
			
		||||
      std::vector<Coeff_t> aeo;    
 | 
			
		||||
      // LDU factorisation of the eeoo matrix
 | 
			
		||||
      std::vector<RealD> lee;    
 | 
			
		||||
      std::vector<RealD> leem;    
 | 
			
		||||
      std::vector<RealD> uee;    
 | 
			
		||||
      std::vector<RealD> ueem;    
 | 
			
		||||
      std::vector<RealD> dee;    
 | 
			
		||||
      std::vector<Coeff_t> lee;    
 | 
			
		||||
      std::vector<Coeff_t> leem;    
 | 
			
		||||
      std::vector<Coeff_t> uee;    
 | 
			
		||||
      std::vector<Coeff_t> ueem;    
 | 
			
		||||
      std::vector<Coeff_t> dee;    
 | 
			
		||||
 | 
			
		||||
      // Constructors
 | 
			
		||||
      CayleyFermion5D(GaugeField &_Umu,
 | 
			
		||||
@@ -117,20 +117,19 @@ namespace Grid {
 | 
			
		||||
		      GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		      RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    protected:
 | 
			
		||||
      void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
 | 
			
		||||
      void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#define INSTANTIATE_DPERP(A)\
 | 
			
		||||
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
 | 
			
		||||
					std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
 | 
			
		||||
					std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
 | 
			
		||||
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
 | 
			
		||||
					   std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
 | 
			
		||||
					   std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
 | 
			
		||||
template void CayleyFermion5D< A >::MooeeInv    (const FermionField &psi, FermionField &chi); \
 | 
			
		||||
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -43,9 +43,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
				std::vector<Coeff_t> &lower,
 | 
			
		||||
				std::vector<Coeff_t> &diag,
 | 
			
		||||
				std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls =this->Ls;
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
@@ -82,9 +82,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
				   std::vector<Coeff_t> &lower,
 | 
			
		||||
				   std::vector<Coeff_t> &diag,
 | 
			
		||||
				   std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls =this->Ls;
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
@@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  INSTANTIATE_DPERP(WilsonImplD);
 | 
			
		||||
  INSTANTIATE_DPERP(GparityWilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP(GparityWilsonImplD);
 | 
			
		||||
  INSTANTIATE_DPERP(ZWilsonImplF);
 | 
			
		||||
  INSTANTIATE_DPERP(ZWilsonImplD);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 
 | 
			
		||||
@@ -43,9 +43,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
				std::vector<Coeff_t> &lower,
 | 
			
		||||
				std::vector<Coeff_t> &diag,
 | 
			
		||||
				std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
@@ -65,9 +65,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
				   std::vector<Coeff_t> &lower,
 | 
			
		||||
				   std::vector<Coeff_t> &diag,
 | 
			
		||||
				   std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  int Ls=this->Ls;
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
 
 | 
			
		||||
@@ -53,9 +53,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
				const FermionField &phi, 
 | 
			
		||||
				FermionField &chi,
 | 
			
		||||
				std::vector<RealD> &lower,
 | 
			
		||||
				std::vector<RealD> &diag,
 | 
			
		||||
				std::vector<RealD> &upper)
 | 
			
		||||
				std::vector<Coeff_t> &lower,
 | 
			
		||||
				std::vector<Coeff_t> &diag,
 | 
			
		||||
				std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls   = this->Ls;
 | 
			
		||||
@@ -121,9 +121,9 @@ template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
 | 
			
		||||
				   const FermionField &phi, 
 | 
			
		||||
				   FermionField &chi,
 | 
			
		||||
				   std::vector<RealD> &lower,
 | 
			
		||||
				   std::vector<RealD> &diag,
 | 
			
		||||
				   std::vector<RealD> &upper)
 | 
			
		||||
				   std::vector<Coeff_t> &lower,
 | 
			
		||||
				   std::vector<Coeff_t> &diag,
 | 
			
		||||
				   std::vector<Coeff_t> &upper)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls   = this->Ls;
 | 
			
		||||
@@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd Pplus  = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
 | 
			
		||||
  Eigen::MatrixXcd Pplus  = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
  Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
 | 
			
		||||
  
 | 
			
		||||
  for(int s=0;s<Ls;s++){
 | 
			
		||||
    Pplus(s,s) = bee[s];
 | 
			
		||||
@@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
 | 
			
		||||
  Pplus (0,Ls-1) = mass*cee[0];
 | 
			
		||||
  Pminus(Ls-1,0) = mass*cee[Ls-1];
 | 
			
		||||
  
 | 
			
		||||
  Eigen::MatrixXd PplusMat ;
 | 
			
		||||
  Eigen::MatrixXd PminusMat;
 | 
			
		||||
  Eigen::MatrixXcd PplusMat ;
 | 
			
		||||
  Eigen::MatrixXcd PminusMat;
 | 
			
		||||
  
 | 
			
		||||
  if ( inv ) {
 | 
			
		||||
    PplusMat =Pplus.inverse();
 | 
			
		||||
@@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_DPERP(DomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_DPERP(DomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
 | 
			
		||||
 | 
			
		||||
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 
 | 
			
		||||
@@ -103,7 +103,7 @@ namespace Grid {
 | 
			
		||||
    typedef typename Impl::StencilImpl             StencilImpl;		\
 | 
			
		||||
    typedef typename Impl::ImplParams ImplParams;			\
 | 
			
		||||
    typedef typename Impl::Coeff_t       Coeff_t;
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
#define INHERIT_IMPL_TYPES(Base) \
 | 
			
		||||
    INHERIT_GIMPL_TYPES(Base)	 \
 | 
			
		||||
    INHERIT_FIMPL_TYPES(Base)
 | 
			
		||||
@@ -122,9 +122,9 @@ namespace Grid {
 | 
			
		||||
      constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
			
		||||
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
 | 
			
		||||
@@ -211,10 +211,9 @@ namespace Grid {
 | 
			
		||||
      
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
      const bool LsVectorised=true;
 | 
			
		||||
      
 | 
			
		||||
      typedef _Coeff_t Coeff_t;      
 | 
			
		||||
      typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      INHERIT_GIMPL_TYPES(Gimpl);
 | 
			
		||||
      
 | 
			
		||||
      template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
 | 
			
		||||
@@ -312,7 +311,7 @@ namespace Grid {
 | 
			
		||||
      static const int Dimension = Nrepresentation;
 | 
			
		||||
 | 
			
		||||
      const bool LsVectorised=false;
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      typedef _Coeff_t Coeff_t;
 | 
			
		||||
      typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
 | 
			
		||||
      
 | 
			
		||||
@@ -515,6 +514,7 @@ namespace Grid {
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF;  // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD;  // Double
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    typedef WilsonImpl<vComplex,  FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
 | 
			
		||||
    typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
 | 
			
		||||
    typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
 | 
			
		||||
 
 | 
			
		||||
@@ -38,90 +38,6 @@ int WilsonKernelsStatic::AsmOpt;
 | 
			
		||||
template <class Impl>
 | 
			
		||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
template <class Impl>
 | 
			
		||||
typename std::enable_if<Impl::Dimension == 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
 | 
			
		||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
			
		||||
    int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
  if (AsmOpt) {
 | 
			
		||||
    WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, in,
 | 
			
		||||
                                             out);
 | 
			
		||||
 | 
			
		||||
  } else {
 | 
			
		||||
#else
 | 
			
		||||
  {
 | 
			
		||||
#endif
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        if (HandOpt)
 | 
			
		||||
          WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, in,
 | 
			
		||||
                                                    out);
 | 
			
		||||
        else
 | 
			
		||||
          WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                       in, out);
 | 
			
		||||
        sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
typename std::enable_if<Impl::Dimension != 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
 | 
			
		||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
			
		||||
    int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
  for (int site = 0; site < Ns; site++) {
 | 
			
		||||
    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
 | 
			
		||||
                                                   out);
 | 
			
		||||
      sF++;
 | 
			
		||||
    }
 | 
			
		||||
    sU++;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Impl> 
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
             std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
             int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out,
 | 
			
		||||
             typename std::enable_if<Impl::Dimension == 3, int>::type = 0)
 | 
			
		||||
{
 | 
			
		||||
  // No asm implementation yet.
 | 
			
		||||
  //  if ( AsmOpt )     WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
  //  else
 | 
			
		||||
  for(int site=0;site<Ns;site++) {
 | 
			
		||||
    for(int s=0;s<Ls;s++) {
 | 
			
		||||
      if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
      else         WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
      sF++;
 | 
			
		||||
    }
 | 
			
		||||
    sU++;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(
 | 
			
		||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
			
		||||
    int sU, int Ls, int Ns, const FermionField &in, FermionField &out,
 | 
			
		||||
    typename std::enable_if<Impl::Dimension != 3, int>::type = 0) {
 | 
			
		||||
  for (int site = 0; site < Ns; site++) {
 | 
			
		||||
    for (int s = 0; s < Ls; s++) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                      in, out);
 | 
			
		||||
      sF++;
 | 
			
		||||
    }
 | 
			
		||||
    sU++;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
// Generic implementation; move to different file?
 | 
			
		||||
////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -33,26 +33,27 @@ directory
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
namespace QCD {
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Helper routines that implement Wilson stencil for a single site.
 | 
			
		||||
// Common to both the WilsonFermion and WilsonFermion5D
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
class WilsonKernelsStatic {
 | 
			
		||||
 public:
 | 
			
		||||
  // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
  static int AsmOpt;   // these are a temporary hack
 | 
			
		||||
  static int HandOpt;  // these are a temporary hack
 | 
			
		||||
};
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Helper routines that implement Wilson stencil for a single site.
 | 
			
		||||
    // Common to both the WilsonFermion and WilsonFermion5D
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    class WilsonKernelsStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
      // S-direction is INNERMOST and takes no part in the parity.
 | 
			
		||||
      static int AsmOpt;  // these are a temporary hack
 | 
			
		||||
      static int HandOpt; // these are a temporary hack
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
 | 
			
		||||
 public:
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef FermionOperator<Impl> Base;
 | 
			
		||||
    template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { 
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
     INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
     typedef FermionOperator<Impl> Base;
 | 
			
		||||
     
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
			
		||||
  DiracOptDhopSite(
 | 
			
		||||
@@ -102,35 +103,45 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc== 3 && EnableBool, void>::type
 | 
			
		||||
  typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
 | 
			
		||||
                          void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
    // No asm implementation yet.
 | 
			
		||||
    //  if ( AsmOpt )
 | 
			
		||||
    //  WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
 | 
			
		||||
    //  else
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        if (HandOpt)
 | 
			
		||||
          WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                       in, out);
 | 
			
		||||
        else
 | 
			
		||||
          WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
 | 
			
		||||
                                                          sU, in, out);
 | 
			
		||||
        sF++;
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    if (AsmOpt) {
 | 
			
		||||
      WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
 | 
			
		||||
                                                  Ns, in, out);
 | 
			
		||||
    } else {
 | 
			
		||||
#else
 | 
			
		||||
    {
 | 
			
		||||
#endif
 | 
			
		||||
      for (int site = 0; site < Ns; site++) {
 | 
			
		||||
        for (int s = 0; s < Ls; s++) {
 | 
			
		||||
          if (HandOpt)
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
                                                         in, out);
 | 
			
		||||
          else
 | 
			
		||||
            WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
 | 
			
		||||
                                                            sU, in, out);
 | 
			
		||||
          sF++;
 | 
			
		||||
        }
 | 
			
		||||
        sU++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <bool EnableBool = true>
 | 
			
		||||
    typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
			
		||||
    DiracOptDhopSiteDag(
 | 
			
		||||
			StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
			std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
			int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
 | 
			
		||||
  typename std::enable_if<
 | 
			
		||||
      (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
 | 
			
		||||
      void>::type
 | 
			
		||||
  DiracOptDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out) {
 | 
			
		||||
    for (int site = 0; site < Ns; site++) {
 | 
			
		||||
      for (int s = 0; s < Ls; s++) {
 | 
			
		||||
        WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
 | 
			
		||||
@@ -140,7 +151,7 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  void DiracOptDhopDir(
 | 
			
		||||
      StencilImpl &st, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
@@ -165,6 +176,12 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptAsmDhopSiteDag(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
			
		||||
      FermionField &out);
 | 
			
		||||
 | 
			
		||||
  void DiracOptHandDhopSite(
 | 
			
		||||
      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
			
		||||
      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
			
		||||
@@ -177,7 +194,9 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  WilsonKernels(const ImplParams &p = ImplParams());
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -73,12 +73,21 @@ static int signInit = setupSigns();
 | 
			
		||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
 | 
			
		||||
#define FX(A) WILSONASM_ ##A
 | 
			
		||||
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
						     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
						     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
						     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
						     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
#undef VMOVIDUP
 | 
			
		||||
#undef VMOVRDUP
 | 
			
		||||
#undef MAYBEPERM
 | 
			
		||||
@@ -89,14 +98,25 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
 | 
			
		||||
#define VMOVIDUP(A,B,C)                                  VBCASTIDUPf(A,B,C)
 | 
			
		||||
#define VMOVRDUP(A,B,C)                                  VBCASTRDUPf(A,B,C)
 | 
			
		||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
 | 
			
		||||
 | 
			
		||||
#undef KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
#define KERNEL_DAG
 | 
			
		||||
template<>
 | 
			
		||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
								   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
			
		||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							      int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);		
 | 
			
		||||
 
 | 
			
		||||
@@ -30,7 +30,11 @@
 | 
			
		||||
  basep = st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    XP_PROJMEM(base);
 | 
			
		||||
#else 
 | 
			
		||||
    XM_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR3,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -41,15 +45,22 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFXP(Xp,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  XP_RECON;
 | 
			
		||||
#else
 | 
			
		||||
  XM_RECON;
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Yp
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  basep = st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    YP_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    YM_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR2,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -60,7 +71,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFYP(Yp,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  YP_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  YM_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Zp
 | 
			
		||||
@@ -68,7 +83,11 @@
 | 
			
		||||
  basep = st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    ZP_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    ZM_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR1,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -79,7 +98,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFZP(Zp,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  ZP_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  ZM_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Tp
 | 
			
		||||
@@ -87,7 +110,11 @@
 | 
			
		||||
  basep = st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    TP_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    TM_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR0,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -98,7 +125,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFTP(Tp,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  TP_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  TM_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Xm
 | 
			
		||||
@@ -107,7 +138,11 @@
 | 
			
		||||
  //  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    XM_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    XP_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR3,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -118,7 +153,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFXM(Xm,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  XM_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  XP_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Ym
 | 
			
		||||
@@ -126,7 +165,11 @@
 | 
			
		||||
  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    YM_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    YP_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR2,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -137,7 +180,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFYM(Ym,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  YM_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  YP_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Zm
 | 
			
		||||
@@ -145,7 +192,11 @@
 | 
			
		||||
  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    ZM_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    ZP_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR1,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -156,7 +207,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFZM(Zm,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  ZM_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  ZP_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Tm
 | 
			
		||||
@@ -164,7 +219,11 @@
 | 
			
		||||
  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  if ( local ) {
 | 
			
		||||
    LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
    TM_PROJMEM(base);
 | 
			
		||||
#else
 | 
			
		||||
    TP_PROJMEM(base);
 | 
			
		||||
#endif
 | 
			
		||||
    MAYBEPERM(PERMUTE_DIR0,perm);
 | 
			
		||||
  } else { 
 | 
			
		||||
    LOAD_CHI(base);
 | 
			
		||||
@@ -175,7 +234,11 @@
 | 
			
		||||
    MULT_2SPIN_DIR_PFTM(Tm,basep);
 | 
			
		||||
  }
 | 
			
		||||
  LOAD64(%r10,isigns);  // times i => shuffle and xor the real part sign bit
 | 
			
		||||
#ifdef KERNEL_DAG
 | 
			
		||||
  TM_RECON_ACCUM;
 | 
			
		||||
#else
 | 
			
		||||
  TP_RECON_ACCUM;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  basep= st.GetPFInfo(nent,plocal); nent++;
 | 
			
		||||
  SAVE_RESULT(base,basep);
 | 
			
		||||
 
 | 
			
		||||
@@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
			
		||||
////////////// Wilson ; uses this implementation /////////////////////
 | 
			
		||||
// Need Nc=3 though //
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
							       int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								  int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
#define INSTANTIATE_THEM(A) \
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
			
		||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
			
		||||
							       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
			
		||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
			
		||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
			
		||||
								  int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
								      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
								      int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
			
		||||
									 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
									 int ss,int sU,const FermionField &in, FermionField &out);
 | 
			
		||||
 | 
			
		||||
INSTANTIATE_THEM(WilsonImplF);
 | 
			
		||||
INSTANTIATE_THEM(WilsonImplD);
 | 
			
		||||
INSTANTIATE_THEM(ZWilsonImplF);
 | 
			
		||||
INSTANTIATE_THEM(ZWilsonImplD);
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplF);
 | 
			
		||||
INSTANTIATE_THEM(GparityWilsonImplD);
 | 
			
		||||
INSTANTIATE_THEM(DomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_THEM(DomainWallVec5dImplD);
 | 
			
		||||
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
 | 
			
		||||
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										79
									
								
								lib/qcd/action/fermion/ZMobiusFermion.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								lib/qcd/action/fermion/ZMobiusFermion.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/qcd/action/fermion/MobiusFermion.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_QCD_ZMOBIUS_FERMION_H
 | 
			
		||||
#define  GRID_QCD_ZMOBIUS_FERMION_H
 | 
			
		||||
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
    template<class Impl>
 | 
			
		||||
    class ZMobiusFermion : public CayleyFermion5D<Impl>
 | 
			
		||||
    {
 | 
			
		||||
    public:
 | 
			
		||||
     INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
    public:
 | 
			
		||||
 | 
			
		||||
      virtual void   Instantiatable(void) {};
 | 
			
		||||
      // Constructors
 | 
			
		||||
      ZMobiusFermion(GaugeField &_Umu,
 | 
			
		||||
		     GridCartesian         &FiveDimGrid,
 | 
			
		||||
		     GridRedBlackCartesian &FiveDimRedBlackGrid,
 | 
			
		||||
		     GridCartesian         &FourDimGrid,
 | 
			
		||||
		     GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
			
		||||
		     RealD _mass,RealD _M5,
 | 
			
		||||
		     std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : 
 | 
			
		||||
      
 | 
			
		||||
      CayleyFermion5D<Impl>(_Umu,
 | 
			
		||||
			    FiveDimGrid,
 | 
			
		||||
			    FiveDimRedBlackGrid,
 | 
			
		||||
			    FourDimGrid,
 | 
			
		||||
			    FourDimRedBlackGrid,_mass,_M5,p)
 | 
			
		||||
 | 
			
		||||
      {
 | 
			
		||||
	RealD eps = 1.0;
 | 
			
		||||
	
 | 
			
		||||
	std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
 | 
			
		||||
	std::vector<Coeff_t> zgamma(this->Ls);
 | 
			
		||||
	for(int s=0;s<this->Ls;s++){
 | 
			
		||||
	  zgamma[s] = gamma[s];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Call base setter
 | 
			
		||||
	this->SetCoefficientsInternal(1.0,zgamma,b,c);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/simd/Grid_empty.h
 | 
			
		||||
    Source file: ./lib/simd/Grid_generic.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
@@ -26,14 +26,6 @@ Author: neo <cossu@post.kek.jp>
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
/*! @file Grid_sse4.h
 | 
			
		||||
  @brief Empty Optimization libraries for debugging
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-06-09 14:28:02 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
@@ -38,8 +38,8 @@ directory
 | 
			
		||||
#ifndef GRID_VECTOR_TYPES
 | 
			
		||||
#define GRID_VECTOR_TYPES
 | 
			
		||||
 | 
			
		||||
#ifdef EMPTY_SIMD
 | 
			
		||||
#include "Grid_empty.h"
 | 
			
		||||
#ifdef GENERIC_VEC
 | 
			
		||||
#include "Grid_generic.h"
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
#include "Grid_sse4.h"
 | 
			
		||||
@@ -388,6 +388,12 @@ class Grid_simd {
 | 
			
		||||
 | 
			
		||||
};  // end of Grid_simd class definition
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
inline void permute(ComplexD &y,ComplexD b, int perm) {  y=b; }
 | 
			
		||||
inline void permute(ComplexF &y,ComplexF b, int perm) {  y=b; }
 | 
			
		||||
inline void permute(RealD &y,RealD b, int perm) {  y=b; }
 | 
			
		||||
inline void permute(RealF &y,RealF b, int perm) {  y=b; }
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////
 | 
			
		||||
// General rotate
 | 
			
		||||
////////////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -67,15 +67,13 @@ template <class scalar>
 | 
			
		||||
struct AsinRealFunctor {
 | 
			
		||||
  scalar operator()(const scalar &a) const { return asin(real(a)); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class scalar>
 | 
			
		||||
struct LogRealFunctor {
 | 
			
		||||
  scalar operator()(const scalar &a) const { return log(real(a)); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class scalar>
 | 
			
		||||
struct ExpRealFunctor {
 | 
			
		||||
  scalar operator()(const scalar &a) const { return exp(real(a)); }
 | 
			
		||||
struct ExpFunctor {
 | 
			
		||||
  scalar operator()(const scalar &a) const { return exp(a); }
 | 
			
		||||
};
 | 
			
		||||
template <class scalar>
 | 
			
		||||
struct NotFunctor {
 | 
			
		||||
@@ -85,7 +83,6 @@ template <class scalar>
 | 
			
		||||
struct AbsRealFunctor {
 | 
			
		||||
  scalar operator()(const scalar &a) const { return std::abs(real(a)); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class scalar>
 | 
			
		||||
struct PowRealFunctor {
 | 
			
		||||
  double y;
 | 
			
		||||
@@ -135,7 +132,6 @@ template <class Scalar>
 | 
			
		||||
inline Scalar rsqrt(const Scalar &r) {
 | 
			
		||||
  return (RSqrtRealFunctor<Scalar>(), r);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V>
 | 
			
		||||
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
 | 
			
		||||
  return SimdApply(CosRealFunctor<S>(), r);
 | 
			
		||||
@@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
 | 
			
		||||
}
 | 
			
		||||
template <class S, class V>
 | 
			
		||||
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
 | 
			
		||||
  return SimdApply(ExpRealFunctor<S>(), r);
 | 
			
		||||
  return SimdApply(ExpFunctor<S>(), r);
 | 
			
		||||
}
 | 
			
		||||
template <class S, class V>
 | 
			
		||||
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										87
									
								
								m4/ax_compiler_vendor.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								m4/ax_compiler_vendor.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,87 @@
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#    http://www.gnu.org/software/autoconf-archive/ax_compiler_vendor.html
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
#
 | 
			
		||||
#   AX_COMPILER_VENDOR
 | 
			
		||||
#
 | 
			
		||||
# DESCRIPTION
 | 
			
		||||
#
 | 
			
		||||
#   Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, sun,
 | 
			
		||||
#   hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, microsoft,
 | 
			
		||||
#   watcom, etc. The vendor is returned in the cache variable
 | 
			
		||||
#   $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++.
 | 
			
		||||
#
 | 
			
		||||
# LICENSE
 | 
			
		||||
#
 | 
			
		||||
#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
 | 
			
		||||
#   Copyright (c) 2008 Matteo Frigo
 | 
			
		||||
#
 | 
			
		||||
#   This program is free software: you can redistribute it and/or modify it
 | 
			
		||||
#   under the terms of the GNU General Public License as published by the
 | 
			
		||||
#   Free Software Foundation, either version 3 of the License, or (at your
 | 
			
		||||
#   option) any later version.
 | 
			
		||||
#
 | 
			
		||||
#   This program is distributed in the hope that it will be useful, but
 | 
			
		||||
#   WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 | 
			
		||||
#   Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
#   You should have received a copy of the GNU General Public License along
 | 
			
		||||
#   with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
#
 | 
			
		||||
#   As a special exception, the respective Autoconf Macro's copyright owner
 | 
			
		||||
#   gives unlimited permission to copy, distribute and modify the configure
 | 
			
		||||
#   scripts that are the output of Autoconf when processing the Macro. You
 | 
			
		||||
#   need not follow the terms of the GNU General Public License when using
 | 
			
		||||
#   or distributing such scripts, even though portions of the text of the
 | 
			
		||||
#   Macro appear in them. The GNU General Public License (GPL) does govern
 | 
			
		||||
#   all other use of the material that constitutes the Autoconf Macro.
 | 
			
		||||
#
 | 
			
		||||
#   This special exception to the GPL applies to versions of the Autoconf
 | 
			
		||||
#   Macro released by the Autoconf Archive. When you make and distribute a
 | 
			
		||||
#   modified version of the Autoconf Macro, you may extend this special
 | 
			
		||||
#   exception to the GPL to apply to your modified version as well.
 | 
			
		||||
 | 
			
		||||
#serial 15
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([AX_COMPILER_VENDOR],
 | 
			
		||||
[AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor,
 | 
			
		||||
  dnl Please add if possible support to ax_compiler_version.m4
 | 
			
		||||
  [# note: don't check for gcc first since some other compilers define __GNUC__
 | 
			
		||||
  vendors="intel:     __ICC,__ECC,__INTEL_COMPILER
 | 
			
		||||
           ibm:       __xlc__,__xlC__,__IBMC__,__IBMCPP__
 | 
			
		||||
           pathscale: __PATHCC__,__PATHSCALE__
 | 
			
		||||
           clang:     __clang__
 | 
			
		||||
           cray:      _CRAYC
 | 
			
		||||
           fujitsu:   __FUJITSU
 | 
			
		||||
           gnu:       __GNUC__
 | 
			
		||||
           sun:       __SUNPRO_C,__SUNPRO_CC
 | 
			
		||||
           hp:        __HP_cc,__HP_aCC
 | 
			
		||||
           dec:       __DECC,__DECCXX,__DECC_VER,__DECCXX_VER
 | 
			
		||||
           borland:   __BORLANDC__,__CODEGEARC__,__TURBOC__
 | 
			
		||||
           comeau:    __COMO__
 | 
			
		||||
           kai:       __KCC
 | 
			
		||||
           lcc:       __LCC__
 | 
			
		||||
           sgi:       __sgi,sgi
 | 
			
		||||
           microsoft: _MSC_VER
 | 
			
		||||
           metrowerks: __MWERKS__
 | 
			
		||||
           watcom:    __WATCOMC__
 | 
			
		||||
           portland:  __PGI
 | 
			
		||||
	   tcc:       __TINYC__
 | 
			
		||||
           unknown:   UNKNOWN"
 | 
			
		||||
  for ventest in $vendors; do
 | 
			
		||||
    case $ventest in
 | 
			
		||||
      *:) vendor=$ventest; continue ;;
 | 
			
		||||
      *)  vencpp="defined("`echo $ventest | sed 's/,/) || defined(/g'`")" ;;
 | 
			
		||||
    esac
 | 
			
		||||
    AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[
 | 
			
		||||
      #if !($vencpp)
 | 
			
		||||
        thisisanerror;
 | 
			
		||||
      #endif
 | 
			
		||||
    ])], [break])
 | 
			
		||||
  done
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=`echo $vendor | cut -d: -f1`
 | 
			
		||||
 ])
 | 
			
		||||
])
 | 
			
		||||
							
								
								
									
										492
									
								
								m4/ax_compiler_version.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										492
									
								
								m4/ax_compiler_version.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,492 @@
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#    http://www.gnu.org/software/autoconf-archive/ax_compiler_version.html
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
#
 | 
			
		||||
#   AX_COMPILER_VERSION
 | 
			
		||||
#
 | 
			
		||||
# DESCRIPTION
 | 
			
		||||
#
 | 
			
		||||
#   This macro retrieves the compiler version and returns it in the cache
 | 
			
		||||
#   variable $ax_cv_c_compiler_version for C and $ax_cv_cxx_compiler_version
 | 
			
		||||
#   for C++.
 | 
			
		||||
#
 | 
			
		||||
#   Version is returned as epoch:major.minor.patchversion
 | 
			
		||||
#
 | 
			
		||||
#   Epoch is used in order to have an increasing version number in case of
 | 
			
		||||
#   marketing change.
 | 
			
		||||
#
 | 
			
		||||
#   Epoch use: * borland compiler use chronologically 0turboc for turboc
 | 
			
		||||
#   era,
 | 
			
		||||
#
 | 
			
		||||
#     1borlanc BORLANC++ before 5, 2cppbuilder for cppbuilder era,
 | 
			
		||||
#     3borlancpp for return of BORLANC++ (after version 5.5),
 | 
			
		||||
#     4cppbuilder for cppbuilder with year version,
 | 
			
		||||
#     and 5xe for XE era.
 | 
			
		||||
#
 | 
			
		||||
#   An empty string is returned otherwise.
 | 
			
		||||
#
 | 
			
		||||
# LICENSE
 | 
			
		||||
#
 | 
			
		||||
#   Copyright (c) 2014 Bastien ROUCARIES <roucaries.bastien+autoconf@gmail.com>
 | 
			
		||||
#
 | 
			
		||||
#   Copying and distribution of this file, with or without modification, are
 | 
			
		||||
#   permitted in any medium without royalty provided the copyright notice
 | 
			
		||||
#   and this notice are preserved. This file is offered as-is, without any
 | 
			
		||||
#   warranty.
 | 
			
		||||
 | 
			
		||||
#serial 4
 | 
			
		||||
 | 
			
		||||
# for intel
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_INTEL],
 | 
			
		||||
  [ dnl
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    [__INTEL_COMPILER/100],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    [(__INTEL_COMPILER%100)/10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [(__INTEL_COMPILER%10)],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for IBM
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_IBM],
 | 
			
		||||
  [ dnl
 | 
			
		||||
  dnl check between z/OS C/C++  and XL C/C++
 | 
			
		||||
  AC_COMPILE_IFELSE([
 | 
			
		||||
    AC_LANG_PROGRAM([],
 | 
			
		||||
      [
 | 
			
		||||
        #if defined(__COMPILER_VER__)
 | 
			
		||||
        choke me;
 | 
			
		||||
        #endif
 | 
			
		||||
      ])],
 | 
			
		||||
    [
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
        [__xlC__/100],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler major version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
        [__xlC__%100],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler minor version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
        [__xlC_ver__/0x100],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler patch version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_build,
 | 
			
		||||
        [__xlC_ver__%0x100],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler build version]))
 | 
			
		||||
      ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_build"
 | 
			
		||||
    ],
 | 
			
		||||
    [
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
        [__xlC__%1000],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler patch version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
        [(__xlC__/10000)%10],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler minor version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
        [(__xlC__/100000)%10],,
 | 
			
		||||
      	AC_MSG_FAILURE([[[$0]] unknown IBM compiler major version]))
 | 
			
		||||
      ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
    ])
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
# for pathscale
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_PATHSCALE],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    __PATHCC__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pathscale major]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    __PATHCC_MINOR__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pathscale minor]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [__PATHCC_PATCHLEVEL__],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pathscale patch level]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for clang
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_CLANG],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    __clang_major__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown clang major]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    __clang_minor__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown clang minor]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [__clang_patchlevel__],,0)
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for crayc
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_CRAY],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    _RELEASE,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown crayc release]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    _RELEASE_MINOR,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown crayc minor]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for fujitsu
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_FUJITSU],[
 | 
			
		||||
  AC_COMPUTE_INT(ax_cv_[]_AC_LANG_ABBREV[]_compiler_version,
 | 
			
		||||
                 __FCC_VERSION,,
 | 
			
		||||
		 AC_MSG_FAILURE([[[$0]]unknown fujitsu release]))
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for GNU
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_GNU],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    __GNUC__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown gcc major]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    __GNUC_MINOR__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown gcc minor]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [__GNUC_PATCHLEVEL__],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown gcc patch level]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# For sun
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_SUN],[
 | 
			
		||||
  m4_define([_AX_COMPILER_VERSION_SUN_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(__SUNPRO_CC)
 | 
			
		||||
	     __SUNPRO_CC
 | 
			
		||||
	     #else
 | 
			
		||||
	     __SUNPRO_C
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_until59,
 | 
			
		||||
    !!(_AX_COMPILER_VERSION_SUN_NUMBER < 0x1000),,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown sun release version]))
 | 
			
		||||
  AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_until59" = X1],
 | 
			
		||||
    [dnl
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
        _AX_COMPILER_VERSION_SUN_NUMBER % 0x10,,
 | 
			
		||||
	AC_MSG_FAILURE([[[$0]] unknown sun patch version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
        (_AX_COMPILER_VERSION_SUN_NUMBER / 0x10) % 0x10,,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown sun minor version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
        (_AX_COMPILER_VERSION_SUN_NUMBER / 0x100),,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown sun major version]))
 | 
			
		||||
    ],
 | 
			
		||||
    [dnl
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
        _AX_COMPILER_VERSION_SUN_NUMBER % 0x10,,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown sun patch version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
        (_AX_COMPILER_VERSION_SUN_NUMBER / 0x100) % 0x100,,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown sun minor version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
        (_AX_COMPILER_VERSION_SUN_NUMBER / 0x1000),,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown sun major version]))
 | 
			
		||||
    ])
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_HP],[
 | 
			
		||||
  m4_define([_AX_COMPILER_VERSION_HP_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(__HP_cc)
 | 
			
		||||
	     __HP_cc
 | 
			
		||||
	     #else
 | 
			
		||||
	     __HP_aCC
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_untilA0121,
 | 
			
		||||
    !!(_AX_COMPILER_VERSION_HP_NUMBER <= 1),,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown hp release version]))
 | 
			
		||||
  AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_untilA0121" = X1],
 | 
			
		||||
    [dnl By default output last version with this behavior.
 | 
			
		||||
     dnl it is so old
 | 
			
		||||
      ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="01.21.00"
 | 
			
		||||
    ],
 | 
			
		||||
    [dnl
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
        (_AX_COMPILER_VERSION_HP_NUMBER % 100),,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown hp release version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
        ((_AX_COMPILER_VERSION_HP_NUMBER / 100)%100),,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown hp minor version]))
 | 
			
		||||
      AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
        ((_AX_COMPILER_VERSION_HP_NUMBER / 10000)%100),,
 | 
			
		||||
        AC_MSG_FAILURE([[[$0]] unknown hp major version]))
 | 
			
		||||
      ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
    ])
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_DEC],[dnl
 | 
			
		||||
  m4_define([_AX_COMPILER_VERSION_DEC_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(__DECC_VER)
 | 
			
		||||
	     __DECC_VER
 | 
			
		||||
	     #else
 | 
			
		||||
	     __DECCXX_VER
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    (_AX_COMPILER_VERSION_DEC_NUMBER % 10000),,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown dec release version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    ((_AX_COMPILER_VERSION_DEC_NUMBER / 100000UL)%100),,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown dec minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    ((_AX_COMPILER_VERSION_DEC_NUMBER / 10000000UL)%100),,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown dec major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# borland
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_BORLAND],[dnl
 | 
			
		||||
  m4_define([_AX_COMPILER_VERSION_TURBOC_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(__TURBOC__)
 | 
			
		||||
	     __TURBOC__
 | 
			
		||||
	     #else
 | 
			
		||||
	     choke me
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
  m4_define([_AX_COMPILER_VERSION_BORLANDC_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(__BORLANDC__)
 | 
			
		||||
	     __BORLANDC__
 | 
			
		||||
	     #else
 | 
			
		||||
	     __CODEGEARC__
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
 AC_COMPILE_IFELSE(
 | 
			
		||||
   [AC_LANG_PROGRAM(,
 | 
			
		||||
     _AX_COMPILER_VERSION_TURBOC_NUMBER)],
 | 
			
		||||
   [dnl TURBOC
 | 
			
		||||
     AC_COMPUTE_INT(
 | 
			
		||||
       _ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw,
 | 
			
		||||
       _AX_COMPILER_VERSION_TURBOC_NUMBER,,
 | 
			
		||||
       AC_MSG_FAILURE([[[$0]] unknown turboc version]))
 | 
			
		||||
     AS_IF(
 | 
			
		||||
       [test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw -lt 661 || test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw -gt 1023],
 | 
			
		||||
       [dnl compute normal version
 | 
			
		||||
        AC_COMPUTE_INT(
 | 
			
		||||
	  _ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
	  _AX_COMPILER_VERSION_TURBOC_NUMBER % 0x100,,
 | 
			
		||||
	  AC_MSG_FAILURE([[[$0]] unknown turboc minor version]))
 | 
			
		||||
	AC_COMPUTE_INT(
 | 
			
		||||
	  _ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
	  (_AX_COMPILER_VERSION_TURBOC_NUMBER/0x100)%0x100,,
 | 
			
		||||
	  AC_MSG_FAILURE([[[$0]] unknown turboc major version]))
 | 
			
		||||
	ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"],
 | 
			
		||||
      [dnl special version
 | 
			
		||||
       AS_CASE([$_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw],
 | 
			
		||||
         [661],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:1.00"],
 | 
			
		||||
	 [662],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:1.01"],
 | 
			
		||||
         [663],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:2.00"],
 | 
			
		||||
	 [
 | 
			
		||||
	 AC_MSG_WARN([[[$0]] unknown turboc version between 0x295 and 0x400 please report bug])
 | 
			
		||||
	 ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=""
 | 
			
		||||
	 ])
 | 
			
		||||
      ])
 | 
			
		||||
    ],
 | 
			
		||||
    # borlandc
 | 
			
		||||
    [
 | 
			
		||||
    AC_COMPUTE_INT(
 | 
			
		||||
      _ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw,
 | 
			
		||||
      _AX_COMPILER_VERSION_BORLANDC_NUMBER,,
 | 
			
		||||
      AC_MSG_FAILURE([[[$0]] unknown borlandc version]))
 | 
			
		||||
    AS_CASE([$_ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw],
 | 
			
		||||
      dnl BORLANC++ before 5.5
 | 
			
		||||
      [512] ,[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:2.00"],
 | 
			
		||||
      [1024],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.00"],
 | 
			
		||||
      [1024],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.00"],
 | 
			
		||||
      [1040],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.1"],
 | 
			
		||||
      [1106],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:4.0"],
 | 
			
		||||
      [1280],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:5.0"],
 | 
			
		||||
      [1312],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:5.02"],
 | 
			
		||||
      dnl C++ Builder era
 | 
			
		||||
      [1328],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="2cppbuilder:3.0"],
 | 
			
		||||
      [1344],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="2cppbuilder:4.0"],
 | 
			
		||||
      dnl BORLANC++ after 5.5
 | 
			
		||||
      [1360],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.5"],
 | 
			
		||||
      [1361],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.51"],
 | 
			
		||||
      [1378],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.6.4"],
 | 
			
		||||
      dnl C++ Builder with year number
 | 
			
		||||
      [1392],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2006"],
 | 
			
		||||
      [1424],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2007"],
 | 
			
		||||
      [1555],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2009"],
 | 
			
		||||
      [1569],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2010"],
 | 
			
		||||
      dnl XE version
 | 
			
		||||
      [1584],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe"],
 | 
			
		||||
      [1600],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:2"],
 | 
			
		||||
      [1616],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:3"],
 | 
			
		||||
      [1632],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:4"],
 | 
			
		||||
      [
 | 
			
		||||
      AC_MSG_WARN([[[$0]] Unknow borlanc compiler version $_ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw please report bug])
 | 
			
		||||
      ])
 | 
			
		||||
    ])
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# COMO
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_COMEAU],
 | 
			
		||||
  [ dnl
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    [__COMO_VERSION__%100],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown comeau compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    [(__COMO_VERSION__/100)%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown comeau compiler major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# KAI
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_KAI],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [__KCC_VERSION%100],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown kay compiler patch version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    [(__KCC_VERSION/100)%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown kay compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    [(__KCC_VERSION/1000)%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown kay compiler major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
dnl LCC
 | 
			
		||||
dnl LCC does not output version...
 | 
			
		||||
 | 
			
		||||
# SGI
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_SGI],[
 | 
			
		||||
   m4_define([_AX_COMPILER_VERSION_SGI_NUMBER],
 | 
			
		||||
            [
 | 
			
		||||
	     #if defined(_COMPILER_VERSION)
 | 
			
		||||
	     _COMPILER_VERSION
 | 
			
		||||
	     #else
 | 
			
		||||
	     _SGI_COMPILER_VERSION
 | 
			
		||||
	     #endif
 | 
			
		||||
	    ])
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [_AX_COMPILER_VERSION_SGI_NUMBER%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown SGI compiler patch version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    [(_AX_COMPILER_VERSION_SGI_NUMBER/10)%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown SGI compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    [(_AX_COMPILER_VERSION_SGI_NUMBER/100)%10],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown SGI compiler major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# microsoft
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_MICROSOFT],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    _MSC_VER%100,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown microsoft compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    (_MSC_VER/100)%100,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown microsoft compiler major version]))
 | 
			
		||||
  dnl could be overriden
 | 
			
		||||
  _ax_[]_AC_LANG_ABBREV[]_compiler_version_patch=0
 | 
			
		||||
  _ax_[]_AC_LANG_ABBREV[]_compiler_version_build=0
 | 
			
		||||
  # special case for version 6
 | 
			
		||||
  AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major" = "X12"],
 | 
			
		||||
    [AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
       _MSC_FULL_VER%1000,,
 | 
			
		||||
       _ax_[]_AC_LANG_ABBREV[]_compiler_version_patch=0)])
 | 
			
		||||
  # for version 7
 | 
			
		||||
  AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major" = "X13"],
 | 
			
		||||
    [AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
       _MSC_FULL_VER%1000,,
 | 
			
		||||
       AC_MSG_FAILURE([[[$0]] unknown microsoft compiler patch version]))
 | 
			
		||||
    ])
 | 
			
		||||
  # for version > 8
 | 
			
		||||
 AS_IF([test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_major -ge 14],
 | 
			
		||||
    [AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
       _MSC_FULL_VER%10000,,
 | 
			
		||||
       AC_MSG_FAILURE([[[$0]] unknown microsoft compiler patch version]))
 | 
			
		||||
    ])
 | 
			
		||||
 AS_IF([test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_major -ge 15],
 | 
			
		||||
    [AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_build,
 | 
			
		||||
       _MSC_BUILD,,
 | 
			
		||||
       AC_MSG_FAILURE([[[$0]] unknown microsoft compiler build version]))
 | 
			
		||||
    ])
 | 
			
		||||
 ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_build"
 | 
			
		||||
 ])
 | 
			
		||||
 | 
			
		||||
# for metrowerks
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_METROWERKS],[dnl
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    __MWERKS__%0x100,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler patch version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    (__MWERKS__/0x100)%0x10,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    (__MWERKS__/0x1000)%0x10,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for watcom
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_WATCOM],[dnl
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    __WATCOMC__%100,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown watcom compiler minor version]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    (__WATCOMC__/100)%100,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown watcom compiler major version]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# for PGI
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_PORTLAND],[
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
 | 
			
		||||
    __PGIC__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pgi major]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
 | 
			
		||||
    __PGIC_MINOR__,,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pgi minor]))
 | 
			
		||||
  AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
 | 
			
		||||
    [__PGIC_PATCHLEVEL__],,
 | 
			
		||||
    AC_MSG_FAILURE([[[$0]] unknown pgi patch level]))
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
 | 
			
		||||
  ])
 | 
			
		||||
 | 
			
		||||
# tcc
 | 
			
		||||
AC_DEFUN([_AX_COMPILER_VERSION_TCC],[
 | 
			
		||||
  ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=[`tcc -v | $SED 's/^[ ]*tcc[ ]\+version[ ]\+\([0-9.]\+\).*/\1/g'`]
 | 
			
		||||
  ])
 | 
			
		||||
# main entry point
 | 
			
		||||
AC_DEFUN([AX_COMPILER_VERSION],[dnl
 | 
			
		||||
  AC_REQUIRE([AX_COMPILER_VENDOR])
 | 
			
		||||
  AC_REQUIRE([AC_PROG_SED])
 | 
			
		||||
  AC_CACHE_CHECK([for _AC_LANG compiler version],
 | 
			
		||||
    ax_cv_[]_AC_LANG_ABBREV[]_compiler_version,
 | 
			
		||||
    [ dnl
 | 
			
		||||
      AS_CASE([$ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor],
 | 
			
		||||
        [intel],[_AX_COMPILER_VERSION_INTEL],
 | 
			
		||||
	[ibm],[_AX_COMPILER_VERSION_IBM],
 | 
			
		||||
	[pathscale],[_AX_COMPILER_VERSION_PATHSCALE],
 | 
			
		||||
	[clang],[_AX_COMPILER_VERSION_CLANG],
 | 
			
		||||
	[cray],[_AX_COMPILER_VERSION_CRAY],
 | 
			
		||||
	[fujitsu],[_AX_COMPILER_VERSION_FUJITSU],
 | 
			
		||||
        [gnu],[_AX_COMPILER_VERSION_GNU],
 | 
			
		||||
	[sun],[_AX_COMPILER_VERSION_SUN],
 | 
			
		||||
	[hp],[_AX_COMPILER_VERSION_HP],
 | 
			
		||||
	[dec],[_AX_COMPILER_VERSION_DEC],
 | 
			
		||||
	[borland],[_AX_COMPILER_VERSION_BORLAND],
 | 
			
		||||
	[comeau],[_AX_COMPILER_VERSION_COMEAU],
 | 
			
		||||
	[kai],[_AX_COMPILER_VERSION_KAI],
 | 
			
		||||
	[sgi],[_AX_COMPILER_VERSION_SGI],
 | 
			
		||||
	[microsoft],[_AX_COMPILER_VERSION_MICROSOFT],
 | 
			
		||||
	[metrowerks],[_AX_COMPILER_VERSION_METROWERKS],
 | 
			
		||||
	[watcom],[_AX_COMPILER_VERSION_WATCOM],
 | 
			
		||||
	[portland],[_AX_COMPILER_VERSION_PORTLAND],
 | 
			
		||||
	[tcc],[_AX_COMPILER_VERSION_TCC],
 | 
			
		||||
  	[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=""])
 | 
			
		||||
    ])
 | 
			
		||||
])
 | 
			
		||||
							
								
								
									
										34
									
								
								m4/ax_gcc_option.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								m4/ax_gcc_option.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
			
		||||
AC_DEFUN([AX_GCC_OPTION], [
 | 
			
		||||
AC_REQUIRE([AC_PROG_CC])
 | 
			
		||||
 | 
			
		||||
AC_MSG_CHECKING([if gcc accepts $1 option])
 | 
			
		||||
 | 
			
		||||
AS_IF([ test "x$GCC" = "xyes" ],[
 | 
			
		||||
AS_IF([ test -z "$3" ],[
 | 
			
		||||
ax_gcc_option_test="int main()
 | 
			
		||||
{
 | 
			
		||||
return 0;
 | 
			
		||||
}"
 | 
			
		||||
],[
 | 
			
		||||
ax_gcc_option_test="$3"
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
# Dump the test program to file
 | 
			
		||||
cat <<EOF > conftest.c
 | 
			
		||||
$ax_gcc_option_test
 | 
			
		||||
EOF
 | 
			
		||||
 | 
			
		||||
# Dump back the file to the log, useful for debugging purposes
 | 
			
		||||
AC_TRY_COMMAND(cat conftest.c 1>&AS_MESSAGE_LOG_FD)
 | 
			
		||||
 | 
			
		||||
AS_IF([ AC_TRY_COMMAND($CC $2 $1 -c conftest.c 1>&AS_MESSAGE_LOG_FD) ],[
 | 
			
		||||
AC_MSG_RESULT([yes])
 | 
			
		||||
$4
 | 
			
		||||
],[
 | 
			
		||||
AC_MSG_RESULT([no])
 | 
			
		||||
$5
 | 
			
		||||
])
 | 
			
		||||
],[
 | 
			
		||||
AC_MSG_RESULT([no gcc available])
 | 
			
		||||
])
 | 
			
		||||
])
 | 
			
		||||
							
								
								
									
										64
									
								
								m4/ax_gcc_version.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								m4/ax_gcc_version.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,64 @@
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#      http://www.gnu.org/software/autoconf-archive/ax_gcc_version.html
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#
 | 
			
		||||
# OBSOLETE MACRO
 | 
			
		||||
#
 | 
			
		||||
#   Use AX_COMPILER_VERSION instead
 | 
			
		||||
#
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
#
 | 
			
		||||
#   AX_GCC_VERSION
 | 
			
		||||
#
 | 
			
		||||
# DESCRIPTION
 | 
			
		||||
#
 | 
			
		||||
#   This macro retrieves the gcc version and returns it in the GCC_VERSION
 | 
			
		||||
#   variable if available, an empty string otherwise.
 | 
			
		||||
#
 | 
			
		||||
# LICENSE
 | 
			
		||||
#
 | 
			
		||||
#   Copyright (c) 2009 Francesco Salvestrini <salvestrini@users.sourceforge.net>
 | 
			
		||||
#
 | 
			
		||||
#   This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
#   under the terms of the GNU General Public License as published by the
 | 
			
		||||
#   Free Software Foundation; either version 2 of the License, or (at your
 | 
			
		||||
#   option) any later version.
 | 
			
		||||
#
 | 
			
		||||
#   This program is distributed in the hope that it will be useful, but
 | 
			
		||||
#   WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 | 
			
		||||
#   Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
#   You should have received a copy of the GNU General Public License along
 | 
			
		||||
#   with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
#
 | 
			
		||||
#   As a special exception, the respective Autoconf Macro's copyright owner
 | 
			
		||||
#   gives unlimited permission to copy, distribute and modify the configure
 | 
			
		||||
#   scripts that are the output of Autoconf when processing the Macro. You
 | 
			
		||||
#   need not follow the terms of the GNU General Public License when using
 | 
			
		||||
#   or distributing such scripts, even though portions of the text of the
 | 
			
		||||
#   Macro appear in them. The GNU General Public License (GPL) does govern
 | 
			
		||||
#   all other use of the material that constitutes the Autoconf Macro.
 | 
			
		||||
#
 | 
			
		||||
#   This special exception to the GPL applies to versions of the Autoconf
 | 
			
		||||
#   Macro released by the Autoconf Archive. When you make and distribute a
 | 
			
		||||
#   modified version of the Autoconf Macro, you may extend this special
 | 
			
		||||
#   exception to the GPL to apply to your modified version as well.
 | 
			
		||||
 | 
			
		||||
#serial 10
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([AX_GCC_VERSION], [
 | 
			
		||||
  AC_OBSOLETE([$0], [;please use AX_COMPILER_VERSION instead])
 | 
			
		||||
  AC_LANG_PUSH([C])
 | 
			
		||||
  AC_REQUIRE([AX_COMPILER_VENDOR])
 | 
			
		||||
  AC_REQUIRE([AX_COMPILER_VERSION])
 | 
			
		||||
  AC_LANG_POP([C])
 | 
			
		||||
  GCC_VERSION=""
 | 
			
		||||
  ax_cv_gcc_version=""
 | 
			
		||||
  AS_IF([test "X$ax_cv_c_compiler_vendor" = "Xgnu"],
 | 
			
		||||
    [dnl
 | 
			
		||||
    ax_cv_gcc_version=$ax_cv_c_compiler_version
 | 
			
		||||
    GCC_VERSION=$ax_cv_gcc_version
 | 
			
		||||
    ])
 | 
			
		||||
  AC_SUBST([GCC_VERSION])
 | 
			
		||||
])
 | 
			
		||||
							
								
								
									
										67
									
								
								m4/ax_gxx_version.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								m4/ax_gxx_version.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#             http://autoconf-archive.cryp.to/ax_gxx_version.html
 | 
			
		||||
# ===========================================================================
 | 
			
		||||
#
 | 
			
		||||
# SYNOPSIS
 | 
			
		||||
#
 | 
			
		||||
#   AX_GXX_VERSION
 | 
			
		||||
#
 | 
			
		||||
# DESCRIPTION
 | 
			
		||||
#
 | 
			
		||||
#   This macro retrieves the g++ version and returns it in the GXX_VERSION
 | 
			
		||||
#   variable if available, an empty string otherwise.
 | 
			
		||||
#
 | 
			
		||||
# LAST MODIFICATION
 | 
			
		||||
#
 | 
			
		||||
#   2008-04-12
 | 
			
		||||
#
 | 
			
		||||
# COPYLEFT
 | 
			
		||||
#
 | 
			
		||||
#   Copyright (c) 2008 Francesco Salvestrini <salvestrini@users.sourceforge.net>
 | 
			
		||||
#
 | 
			
		||||
#   This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
#   under the terms of the GNU General Public License as published by the
 | 
			
		||||
#   Free Software Foundation; either version 2 of the License, or (at your
 | 
			
		||||
#   option) any later version.
 | 
			
		||||
#
 | 
			
		||||
#   This program is distributed in the hope that it will be useful, but
 | 
			
		||||
#   WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 | 
			
		||||
#   Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
#   You should have received a copy of the GNU General Public License along
 | 
			
		||||
#   with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
#
 | 
			
		||||
#   As a special exception, the respective Autoconf Macro's copyright owner
 | 
			
		||||
#   gives unlimited permission to copy, distribute and modify the configure
 | 
			
		||||
#   scripts that are the output of Autoconf when processing the Macro. You
 | 
			
		||||
#   need not follow the terms of the GNU General Public License when using
 | 
			
		||||
#   or distributing such scripts, even though portions of the text of the
 | 
			
		||||
#   Macro appear in them. The GNU General Public License (GPL) does govern
 | 
			
		||||
#   all other use of the material that constitutes the Autoconf Macro.
 | 
			
		||||
#
 | 
			
		||||
#   This special exception to the GPL applies to versions of the Autoconf
 | 
			
		||||
#   Macro released by the Autoconf Macro Archive. When you make and
 | 
			
		||||
#   distribute a modified version of the Autoconf Macro, you may extend this
 | 
			
		||||
#   special exception to the GPL to apply to your modified version as well.
 | 
			
		||||
 | 
			
		||||
AC_DEFUN([AX_GXX_VERSION], [
 | 
			
		||||
  GXX_VERSION=""
 | 
			
		||||
  AX_GCC_OPTION([-dumpversion],[],[],[
 | 
			
		||||
    ax_gcc_version_option=yes
 | 
			
		||||
  ],[
 | 
			
		||||
    ax_gcc_version_option=no
 | 
			
		||||
  ])
 | 
			
		||||
  AS_IF([test "x$GXX" = "xyes"],[
 | 
			
		||||
    AS_IF([test "x$ax_gxx_version_option" != "no"],[
 | 
			
		||||
      AC_CACHE_CHECK([gxx version],[ax_cv_gxx_version],[
 | 
			
		||||
        ax_cv_gxx_version="`$CXX -dumpversion`"
 | 
			
		||||
        AS_IF([test "x$ax_cv_gxx_version" = "x"],[
 | 
			
		||||
          ax_cv_gxx_version=""
 | 
			
		||||
        ])
 | 
			
		||||
      ])
 | 
			
		||||
      GXX_VERSION=$ax_cv_gxx_version
 | 
			
		||||
    ])
 | 
			
		||||
  ])
 | 
			
		||||
  AC_SUBST([GXX_VERSION])
 | 
			
		||||
])
 | 
			
		||||
							
								
								
									
										203
									
								
								m4/lx_find_mpi.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								m4/lx_find_mpi.m4
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,203 @@
 | 
			
		||||
#################################################################################################
 | 
			
		||||
# Copyright (c) 2010, Lawrence Livermore National Security, LLC.
 | 
			
		||||
# Produced at the Lawrence Livermore National Laboratory
 | 
			
		||||
# Written by Todd Gamblin, tgamblin@llnl.gov.
 | 
			
		||||
# LLNL-CODE-417602
 | 
			
		||||
# All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# This file is part of Libra. For details, see http://github.com/tgamblin/libra.
 | 
			
		||||
# Please also read the LICENSE file for further information.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without modification, are
 | 
			
		||||
# permitted provided that the following conditions are met:
 | 
			
		||||
#
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright notice, this list of
 | 
			
		||||
#    conditions and the disclaimer below.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright notice, this list of
 | 
			
		||||
#    conditions and the disclaimer (as noted below) in the documentation and/or other materials
 | 
			
		||||
#    provided with the distribution.
 | 
			
		||||
#  * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
 | 
			
		||||
#    or promote products derived from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 | 
			
		||||
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
			
		||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 | 
			
		||||
# LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
 | 
			
		||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 | 
			
		||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
			
		||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | 
			
		||||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 | 
			
		||||
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#################################################################################################
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# LX_FIND_MPI()
 | 
			
		||||
#  ------------------------------------------------------------------------
 | 
			
		||||
# This macro finds an MPI compiler and extracts includes and libraries from
 | 
			
		||||
# it for use in automake projects.  The script exports the following variables:
 | 
			
		||||
#
 | 
			
		||||
# AC_DEFINE variables:
 | 
			
		||||
#     HAVE_MPI         AC_DEFINE'd to 1 if we found MPI
 | 
			
		||||
#
 | 
			
		||||
# AC_SUBST variables:
 | 
			
		||||
#     MPICC            Name of MPI compiler
 | 
			
		||||
#     MPI_CFLAGS       Includes and defines for MPI C compilation
 | 
			
		||||
#     MPI_CLDFLAGS     Libraries and library paths for linking MPI C programs
 | 
			
		||||
#
 | 
			
		||||
#     MPICXX           Name of MPI C++ compiler
 | 
			
		||||
#     MPI_CXXFLAGS     Includes and defines for MPI C++ compilation
 | 
			
		||||
#     MPI_CXXLDFLAGS   Libraries and library paths for linking MPI C++ programs
 | 
			
		||||
#
 | 
			
		||||
#     MPIF77           Name of MPI Fortran 77 compiler
 | 
			
		||||
#     MPI_F77FLAGS     Includes and defines for MPI Fortran 77 compilation
 | 
			
		||||
#     MPI_F77LDFLAGS   Libraries and library paths for linking MPI Fortran 77 programs
 | 
			
		||||
#
 | 
			
		||||
#     MPIFC            Name of MPI Fortran compiler
 | 
			
		||||
#     MPI_FFLAGS       Includes and defines for MPI Fortran compilation
 | 
			
		||||
#     MPI_FLDFLAGS     Libraries and library paths for linking MPI Fortran programs
 | 
			
		||||
#
 | 
			
		||||
# Shell variables output by this macro:
 | 
			
		||||
#     have_C_mpi       'yes' if we found MPI for C, 'no' otherwise
 | 
			
		||||
#     have_CXX_mpi     'yes' if we found MPI for C++, 'no' otherwise
 | 
			
		||||
#     have_F77_mpi     'yes' if we found MPI for F77, 'no' otherwise
 | 
			
		||||
#     have_F_mpi       'yes' if we found MPI for Fortran, 'no' otherwise
 | 
			
		||||
#
 | 
			
		||||
AC_DEFUN([LX_FIND_MPI],
 | 
			
		||||
[
 | 
			
		||||
     AC_LANG_CASE(
 | 
			
		||||
     [C], [
 | 
			
		||||
         AC_REQUIRE([AC_PROG_CC])
 | 
			
		||||
         if [[ ! -z "$MPICC" ]]; then
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPICC, [$MPICC], C)
 | 
			
		||||
         else
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPICC, [mpicc mpiicc mpixlc mpipgcc], C)
 | 
			
		||||
         fi
 | 
			
		||||
     ],
 | 
			
		||||
     [C++], [
 | 
			
		||||
         AC_REQUIRE([AC_PROG_CXX])
 | 
			
		||||
         if [[ ! -z "$MPICXX" ]]; then
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPICXX, [$MPICXX], CXX)
 | 
			
		||||
         else
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPICXX, [mpicxx mpiCC mpic++ mpig++ mpiicpc mpipgCC mpixlC], CXX)
 | 
			
		||||
         fi
 | 
			
		||||
     ],
 | 
			
		||||
     [F77], [
 | 
			
		||||
         AC_REQUIRE([AC_PROG_F77])
 | 
			
		||||
         if [[ ! -z "$MPIF77" ]]; then
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPIF77, [$MPIF77], F77)
 | 
			
		||||
         else
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPIF77, [mpif77 mpiifort mpixlf77 mpixlf77_r], F77)
 | 
			
		||||
         fi
 | 
			
		||||
     ],
 | 
			
		||||
     [Fortran], [
 | 
			
		||||
         AC_REQUIRE([AC_PROG_FC])
 | 
			
		||||
         if [[ ! -z "$MPIFC" ]]; then
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPIFC, [$MPIFC], F)
 | 
			
		||||
         else
 | 
			
		||||
             mpi_default_fc="mpif95 mpif90 mpigfortran mpif2003"
 | 
			
		||||
             mpi_intel_fc="mpiifort"
 | 
			
		||||
             mpi_xl_fc="mpixlf95 mpixlf95_r mpixlf90 mpixlf90_r mpixlf2003 mpixlf2003_r"
 | 
			
		||||
             mpi_pg_fc="mpipgf95 mpipgf90"
 | 
			
		||||
             LX_QUERY_MPI_COMPILER(MPIFC, [$mpi_default_fc $mpi_intel_fc $mpi_xl_fc $mpi_pg_fc], F)
 | 
			
		||||
         fi
 | 
			
		||||
     ])
 | 
			
		||||
])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# LX_QUERY_MPI_COMPILER([compiler-var-name], [compiler-names], [output-var-prefix])
 | 
			
		||||
#  ------------------------------------------------------------------------
 | 
			
		||||
# AC_SUBST variables:
 | 
			
		||||
#     MPI_<prefix>FLAGS       Includes and defines for MPI compilation
 | 
			
		||||
#     MPI_<prefix>LDFLAGS     Libraries and library paths for linking MPI C programs
 | 
			
		||||
#
 | 
			
		||||
# Shell variables output by this macro:
 | 
			
		||||
#     found_mpi_flags         'yes' if we were able to get flags, 'no' otherwise
 | 
			
		||||
#
 | 
			
		||||
AC_DEFUN([LX_QUERY_MPI_COMPILER],
 | 
			
		||||
[
 | 
			
		||||
     # Try to find a working MPI compiler from the supplied names
 | 
			
		||||
     AC_PATH_PROGS($1, [$2], [not-found])
 | 
			
		||||
 | 
			
		||||
     # Figure out what the compiler responds to to get it to show us the compile
 | 
			
		||||
     # and link lines.  After this part of the macro, we'll have a valid
 | 
			
		||||
     # lx_mpi_command_line
 | 
			
		||||
     printf "checking whether $$1 responds to '-showme:compile'... "
 | 
			
		||||
     lx_mpi_compile_line=`$$1 -showme:compile 2>/dev/null`
 | 
			
		||||
     if [[ "$?" -eq 0 ]]; then
 | 
			
		||||
         echo yes
 | 
			
		||||
         lx_mpi_link_line=`$$1 -showme:link 2>/dev/null`
 | 
			
		||||
     else
 | 
			
		||||
         echo no
 | 
			
		||||
         printf "checking whether $$1 responds to '-showme'... "
 | 
			
		||||
         lx_mpi_command_line=`$$1 -showme 2>/dev/null`
 | 
			
		||||
         if [[ "$?" -ne 0 ]]; then
 | 
			
		||||
             echo no
 | 
			
		||||
             printf "checking whether $$1 responds to '-compile-info'... "
 | 
			
		||||
             lx_mpi_compile_line=`$$1 -compile-info 2>/dev/null`
 | 
			
		||||
             if [[ "$?" -eq 0 ]]; then
 | 
			
		||||
                 echo yes
 | 
			
		||||
                 lx_mpi_link_line=`$$1 -link-info 2>/dev/null`
 | 
			
		||||
             else
 | 
			
		||||
                 echo no
 | 
			
		||||
                 printf "checking whether $$1 responds to '-show'... "
 | 
			
		||||
                 lx_mpi_command_line=`$$1 -show 2>/dev/null`
 | 
			
		||||
                 if [[ "$?" -eq 0 ]]; then
 | 
			
		||||
                     echo yes
 | 
			
		||||
                 else
 | 
			
		||||
                     echo no
 | 
			
		||||
                 fi
 | 
			
		||||
             fi
 | 
			
		||||
         else
 | 
			
		||||
             echo yes
 | 
			
		||||
         fi
 | 
			
		||||
     fi
 | 
			
		||||
 | 
			
		||||
     if [[ ! -z "$lx_mpi_compile_line" -a ! -z "$lx_mpi_link_line" ]]; then
 | 
			
		||||
         lx_mpi_command_line="$lx_mpi_compile_line $lx_mpi_link_line"
 | 
			
		||||
     fi
 | 
			
		||||
 | 
			
		||||
     if [[ ! -z "$lx_mpi_command_line" ]]; then
 | 
			
		||||
         # Now extract the different parts of the MPI command line.  Do these separately in case we need to
 | 
			
		||||
         # parse them all out in future versions of this macro.
 | 
			
		||||
         lx_mpi_defines=`    echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-D\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
 | 
			
		||||
         lx_mpi_includes=`   echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-I\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
 | 
			
		||||
         lx_mpi_link_paths=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-L\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
 | 
			
		||||
         lx_mpi_libs=`       echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-l\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
 | 
			
		||||
         lx_mpi_link_args=`  echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-Wl,\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
 | 
			
		||||
 | 
			
		||||
         # Create variables and clean up newlines and multiple spaces
 | 
			
		||||
         MPI_$3FLAGS="$lx_mpi_defines $lx_mpi_includes"
 | 
			
		||||
         MPI_$3LDFLAGS="$lx_mpi_link_paths $lx_mpi_libs $lx_mpi_link_args"
 | 
			
		||||
         MPI_$3FLAGS=`  echo "$MPI_$3FLAGS"   | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/  +/ /g'`
 | 
			
		||||
         MPI_$3LDFLAGS=`echo "$MPI_$3LDFLAGS" | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/  +/ /g'`
 | 
			
		||||
 | 
			
		||||
         OLD_CPPFLAGS=$CPPFLAGS
 | 
			
		||||
         OLD_LIBS=$LIBS
 | 
			
		||||
         CPPFLAGS=$MPI_$3FLAGS
 | 
			
		||||
         LIBS=$MPI_$3LDFLAGS
 | 
			
		||||
 | 
			
		||||
         AC_TRY_LINK([#include <mpi.h>],
 | 
			
		||||
                     [int rank, size;
 | 
			
		||||
                      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 | 
			
		||||
                      MPI_Comm_size(MPI_COMM_WORLD, &size);],
 | 
			
		||||
                     [# Add a define for testing at compile time.
 | 
			
		||||
                      AC_DEFINE([HAVE_MPI], [1], [Define to 1 if you have MPI libs and headers.])
 | 
			
		||||
                      have_$3_mpi='yes'],
 | 
			
		||||
                     [# zero out mpi flags so we don't link against the faulty library.
 | 
			
		||||
                      MPI_$3FLAGS=""
 | 
			
		||||
                      MPI_$3LDFLAGS=""
 | 
			
		||||
                      have_$3_mpi='no'])
 | 
			
		||||
 | 
			
		||||
         # AC_SUBST everything.
 | 
			
		||||
         AC_SUBST($1)
 | 
			
		||||
         AC_SUBST(MPI_$3FLAGS)
 | 
			
		||||
         AC_SUBST(MPI_$3LDFLAGS)
 | 
			
		||||
 | 
			
		||||
         LIBS=$OLD_LIBS
 | 
			
		||||
         CPPFLAGS=$OLD_CPPFLAGS
 | 
			
		||||
     else
 | 
			
		||||
         echo Unable to find suitable MPI Compiler. Try setting $1.
 | 
			
		||||
         have_$3_mpi='no'
 | 
			
		||||
     fi
 | 
			
		||||
])
 | 
			
		||||
@@ -1,38 +0,0 @@
 | 
			
		||||
FFTFLAGS=$(filter-out -std=c++11, $(CXXFLAGS) )
 | 
			
		||||
 | 
			
		||||
EIGENVER=3.2.8
 | 
			
		||||
EIGEN=eigen$(EIGENVER)
 | 
			
		||||
EIGENTAR=$(EIGEN).tar.bz2
 | 
			
		||||
EIGENURL=https://bitbucket.org/eigen/eigen/get/$(EIGENVER).tar.bz2
 | 
			
		||||
 | 
			
		||||
FFTWVER=3.3.4
 | 
			
		||||
FFTW=fftw-$(FFTWVER)
 | 
			
		||||
FFTWTAR=fftw-$(FFTWVER).tar.gz
 | 
			
		||||
FFTWURL=http://www.fftw.org/$(FFTWTAR)
 | 
			
		||||
 | 
			
		||||
all: Eigen FFTW headerlist
 | 
			
		||||
 | 
			
		||||
$(top_srcdir)/prerequisites/$(EIGENTAR): 
 | 
			
		||||
	curl -v $(EIGENURL) -o $(top_srcdir)/prerequisites/$(EIGENTAR)
 | 
			
		||||
 | 
			
		||||
$(top_srcdir)/prerequisites/$(FFTWTAR): 
 | 
			
		||||
	curl -v $(FFTWURL) -o $(top_srcdir)/prerequisites/$(FFTWTAR)
 | 
			
		||||
 | 
			
		||||
Eigen:  $(top_srcdir)/prerequisites/$(EIGENTAR)
 | 
			
		||||
	tar xvf $(top_srcdir)/prerequisites/$(EIGENTAR)
 | 
			
		||||
	- rm -rf  $(top_srcdir)/lib/Eigen
 | 
			
		||||
	mv eigen-eigen*/Eigen .
 | 
			
		||||
	echo EFILES=`find Eigen -type f -name '*.h' ` > $(top_srcdir)/lib/Eigen.inc
 | 
			
		||||
	mv Eigen $(top_srcdir)/lib/
 | 
			
		||||
	touch Eigen
 | 
			
		||||
 | 
			
		||||
FFTW: $(top_srcdir)/prerequisites/$(FFTWTAR)
 | 
			
		||||
	tar xvf $(top_srcdir)/prerequisites/$(FFTWTAR)
 | 
			
		||||
	cd $(FFTW) &&	./configure --prefix=@abs_top_builddir@/prerequisites/fftwinstall CFLAGS="$(FFTFLAGS)" CC=$(CC) LDFLAGS="$(LDFLAGS)" && make all install
 | 
			
		||||
	cp -pr fftwinstall/include/fftw3.h ../include/Grid/
 | 
			
		||||
	cp -pr fftwinstall/lib/libfftw3.a  ../lib/
 | 
			
		||||
	touch FFTW
 | 
			
		||||
 | 
			
		||||
headerlist:
 | 
			
		||||
	cd $(top_srcdir) && ./scripts/filelist
 | 
			
		||||
	touch headerlist
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								prerequisites/fftw-3.3.4.tar.gz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								prerequisites/fftw-3.3.4.tar.gz
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1,4 +0,0 @@
 | 
			
		||||
 | 
			
		||||
HFILES=
 | 
			
		||||
 | 
			
		||||
CCFILES=
 | 
			
		||||
@@ -1,63 +1,47 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
 | 
			
		||||
home=`pwd`
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
# library Make.inc
 | 
			
		||||
cd $home/lib
 | 
			
		||||
HFILES=`find . -type f -name '*.h' -not -path '*/Old/*'`
 | 
			
		||||
HFILES="$HFILES Config.h"
 | 
			
		||||
HFILES=`find . -type f -name '*.h' -not -path '*/Old/*' -not -path '*/Eigen/*'`
 | 
			
		||||
HFILES="$HFILES"
 | 
			
		||||
CCFILES=`find . -type f -name '*.cc' -not  -name '*ommunicator*.cc'`
 | 
			
		||||
echo> Make.inc
 | 
			
		||||
echo HFILES=$HFILES >> Make.inc
 | 
			
		||||
echo HFILES=$HFILES > Make.inc
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
echo CCFILES=$CCFILES >> Make.inc
 | 
			
		||||
 | 
			
		||||
# tests Make.inc
 | 
			
		||||
cd $home/tests
 | 
			
		||||
 | 
			
		||||
dirs=`find . -type d `
 | 
			
		||||
 | 
			
		||||
for subdir in $dirs
 | 
			
		||||
do
 | 
			
		||||
 | 
			
		||||
pwd
 | 
			
		||||
echo subdir is $subdir of $dirs
 | 
			
		||||
 | 
			
		||||
cd $home/tests/$subdir
 | 
			
		||||
 | 
			
		||||
TESTS=`ls T*.cc`
 | 
			
		||||
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
 | 
			
		||||
 | 
			
		||||
echo > Make.inc
 | 
			
		||||
echo bin_PROGRAMS += ${TESTLIST} | sed s/Test_zmm//g >> Make.inc
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
for f in $TESTS
 | 
			
		||||
do
 | 
			
		||||
BNAME=`basename $f .cc`
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
echo ${BNAME}_SOURCES=$f  >> Make.inc
 | 
			
		||||
echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
for subdir in $dirs; do
 | 
			
		||||
	cd $home/tests/$subdir
 | 
			
		||||
	TESTS=`ls T*.cc`
 | 
			
		||||
	TESTLIST=`echo ${TESTS} | sed s/.cc//g `
 | 
			
		||||
	PREF=`[ $subdir = '.' ] && echo noinst || echo EXTRA`
 | 
			
		||||
	echo "tests: ${TESTLIST}" > Make.inc
 | 
			
		||||
	echo ${PREF}_PROGRAMS = ${TESTLIST} >> Make.inc
 | 
			
		||||
	echo >> Make.inc
 | 
			
		||||
	for f in $TESTS; do
 | 
			
		||||
		BNAME=`basename $f .cc`
 | 
			
		||||
		echo ${BNAME}_SOURCES=$f  >> Make.inc
 | 
			
		||||
		echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
			
		||||
		echo >> Make.inc
 | 
			
		||||
	done
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
# benchmarks Make.inc
 | 
			
		||||
cd $home/benchmarks
 | 
			
		||||
 | 
			
		||||
echo> Make.inc
 | 
			
		||||
TESTS=`ls B*.cc`
 | 
			
		||||
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
 | 
			
		||||
 | 
			
		||||
echo > Make.inc
 | 
			
		||||
echo bin_PROGRAMS = ${TESTLIST} >> Make.inc
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
 | 
			
		||||
for f in $TESTS
 | 
			
		||||
do
 | 
			
		||||
BNAME=`basename $f .cc`
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
echo ${BNAME}_SOURCES=$f  >> Make.inc
 | 
			
		||||
echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
			
		||||
echo bin_PROGRAMS = ${TESTLIST} > Make.inc
 | 
			
		||||
echo >> Make.inc
 | 
			
		||||
for f in $TESTS; do
 | 
			
		||||
	BNAME=`basename $f .cc`
 | 
			
		||||
	echo ${BNAME}_SOURCES=$f  >> Make.inc
 | 
			
		||||
	echo ${BNAME}_LDADD=-lGrid>> Make.inc
 | 
			
		||||
	echo >> Make.inc
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
cd ..
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										19
									
								
								scripts/update_eigen.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								scripts/update_eigen.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
 | 
			
		||||
if (( $# != 1 )); then
 | 
			
		||||
    echo "usage: `basename $0` <archive>" 1>&2
 | 
			
		||||
    exit 1
 | 
			
		||||
fi
 | 
			
		||||
ARC=$1
 | 
			
		||||
 | 
			
		||||
INITDIR=`pwd`
 | 
			
		||||
rm -rf lib/Eigen
 | 
			
		||||
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
 | 
			
		||||
tar -xf ${ARC}
 | 
			
		||||
cd ${ARCDIR}
 | 
			
		||||
(tar -cf - Eigen --exclude='*.txt' 2>/dev/null) | tar -xf - -C ../lib/
 | 
			
		||||
cd ../lib
 | 
			
		||||
echo 'eigen_files =\' > Eigen.inc
 | 
			
		||||
find Eigen -type f -print | sed 's/^/  /;$q;s/$/ \\/' >> Eigen.inc
 | 
			
		||||
cd ${INITDIR}
 | 
			
		||||
rm -rf ${ARCDIR}
 | 
			
		||||
							
								
								
									
										18
									
								
								scripts/update_fftw.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								scripts/update_fftw.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,18 @@
 | 
			
		||||
#!/usr/bin/env bash
 | 
			
		||||
 | 
			
		||||
if (( $# != 1 )); then
 | 
			
		||||
    echo "usage: `basename $0` <archive>" 1>&2
 | 
			
		||||
    exit 1
 | 
			
		||||
fi
 | 
			
		||||
ARC=$1
 | 
			
		||||
 | 
			
		||||
INITDIR=`pwd`
 | 
			
		||||
rm -rf lib/fftw
 | 
			
		||||
mkdir lib/fftw
 | 
			
		||||
 | 
			
		||||
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
 | 
			
		||||
tar -xf ${ARC}
 | 
			
		||||
cp ${ARCDIR}/api/fftw3.h lib/fftw/
 | 
			
		||||
 | 
			
		||||
cd ${INITDIR}
 | 
			
		||||
rm -rf ${ARCDIR}
 | 
			
		||||
@@ -1,11 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_nersc_io Test_serialisation
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_nersc_io_SOURCES=Test_nersc_io.cc
 | 
			
		||||
Test_nersc_io_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_serialisation_SOURCES=Test_serialisation.cc
 | 
			
		||||
Test_serialisation_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_cshift Test_dwf_mixedcg_prec Test_simd Test_stencil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cshift_SOURCES=Test_cshift.cc
 | 
			
		||||
Test_cshift_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_mixedcg_prec_SOURCES=Test_dwf_mixedcg_prec.cc
 | 
			
		||||
Test_dwf_mixedcg_prec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_simd_SOURCES=Test_simd.cc
 | 
			
		||||
Test_simd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_stencil_SOURCES=Test_stencil.cc
 | 
			
		||||
Test_stencil_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,26 +1,7 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
#SUBDIRS = core
 | 
			
		||||
 | 
			
		||||
# Uncomment to enable complete test suite build
 | 
			
		||||
SUBDIRS = core forces hmc solver debug	
 | 
			
		||||
 | 
			
		||||
if BUILD_CHROMA_REGRESSION
 | 
			
		||||
  SUBDIRS+= qdpxx
 | 
			
		||||
endif
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,83 +1,69 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
 | 
			
		||||
 | 
			
		||||
tests: Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_fft Test_fftf Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
 | 
			
		||||
EXTRA_PROGRAMS = Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_fft Test_fftf Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
 | 
			
		||||
 | 
			
		||||
Test_cf_coarsen_support_SOURCES=Test_cf_coarsen_support.cc
 | 
			
		||||
Test_cf_coarsen_support_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_checker_SOURCES=Test_checker.cc
 | 
			
		||||
Test_checker_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_contfrac_even_odd_SOURCES=Test_contfrac_even_odd.cc
 | 
			
		||||
Test_contfrac_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cshift_red_black_SOURCES=Test_cshift_red_black.cc
 | 
			
		||||
Test_cshift_red_black_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cshift_red_black_rotate_SOURCES=Test_cshift_red_black_rotate.cc
 | 
			
		||||
Test_cshift_red_black_rotate_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cshift_rotate_SOURCES=Test_cshift_rotate.cc
 | 
			
		||||
Test_cshift_rotate_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_even_odd_SOURCES=Test_dwf_even_odd.cc
 | 
			
		||||
Test_dwf_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_rb5d_SOURCES=Test_dwf_rb5d.cc
 | 
			
		||||
Test_dwf_rb5d_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
Test_fft_SOURCES=Test_fft.cc
 | 
			
		||||
Test_fft_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
Test_fftf_SOURCES=Test_fftf.cc
 | 
			
		||||
Test_fftf_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
Test_gamma_SOURCES=Test_gamma.cc
 | 
			
		||||
Test_gamma_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
 | 
			
		||||
Test_GaugeAction_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_gparity_SOURCES=Test_gparity.cc
 | 
			
		||||
Test_gparity_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
 | 
			
		||||
Test_gpwilson_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_lie_generators_SOURCES=Test_lie_generators.cc
 | 
			
		||||
Test_lie_generators_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_main_SOURCES=Test_main.cc
 | 
			
		||||
Test_main_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_quenched_update_SOURCES=Test_quenched_update.cc
 | 
			
		||||
Test_quenched_update_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_RectPlaq_SOURCES=Test_RectPlaq.cc
 | 
			
		||||
Test_RectPlaq_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rng_SOURCES=Test_rng.cc
 | 
			
		||||
Test_rng_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rng_fixed_SOURCES=Test_rng_fixed.cc
 | 
			
		||||
Test_rng_fixed_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_even_odd_SOURCES=Test_wilson_even_odd.cc
 | 
			
		||||
Test_wilson_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_tm_even_odd_SOURCES=Test_wilson_tm_even_odd.cc
 | 
			
		||||
Test_wilson_tm_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										111
									
								
								tests/core/Test_fft.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								tests/core/Test_fft.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_cshift.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
 | 
			
		||||
  int vol = 1;
 | 
			
		||||
  for(int d=0;d<latt_size.size();d++){
 | 
			
		||||
    vol = vol * latt_size[d];
 | 
			
		||||
  }
 | 
			
		||||
  GridCartesian        Fine(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
  LatticeComplexD     one(&Fine);
 | 
			
		||||
  LatticeComplexD      zz(&Fine);
 | 
			
		||||
  LatticeComplexD       C(&Fine);
 | 
			
		||||
  LatticeComplexD  Ctilde(&Fine);
 | 
			
		||||
  LatticeComplexD    coor(&Fine);
 | 
			
		||||
 | 
			
		||||
  LatticeSpinMatrixD    S(&Fine);
 | 
			
		||||
  LatticeSpinMatrixD    Stilde(&Fine);
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> p({1,2,3,2});
 | 
			
		||||
 | 
			
		||||
  one = ComplexD(1.0,0.0);
 | 
			
		||||
  zz  = ComplexD(0.0,0.0);
 | 
			
		||||
 | 
			
		||||
  ComplexD ci(0.0,1.0);
 | 
			
		||||
 | 
			
		||||
  C=zero;
 | 
			
		||||
  for(int mu=0;mu<4;mu++){
 | 
			
		||||
    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
    LatticeCoordinate(coor,mu);
 | 
			
		||||
    C = C - (TwoPiL * p[mu]) * coor;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  C = exp(C*ci);
 | 
			
		||||
 | 
			
		||||
  S=zero;
 | 
			
		||||
  S = S+C;
 | 
			
		||||
 | 
			
		||||
  FFT theFFT(&Fine);
 | 
			
		||||
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,0,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,1,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,2,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,3,FFT::forward);  std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
 | 
			
		||||
  //  C=zero;
 | 
			
		||||
  //  Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
 | 
			
		||||
  TComplexD cVol;
 | 
			
		||||
  cVol()()() = vol;
 | 
			
		||||
 | 
			
		||||
  C=zero;
 | 
			
		||||
  pokeSite(cVol,C,p);
 | 
			
		||||
  C=C-Ctilde;
 | 
			
		||||
  std::cout << "diff scalar "<<norm2(C) << std::endl;
 | 
			
		||||
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,0,FFT::forward);  S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,1,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,2,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
 | 
			
		||||
  SpinMatrixD Sp; 
 | 
			
		||||
  Sp = zero; Sp = Sp+cVol;
 | 
			
		||||
 | 
			
		||||
  S=zero;
 | 
			
		||||
  pokeSite(Sp,S,p);
 | 
			
		||||
 | 
			
		||||
  S= S-Stilde;
 | 
			
		||||
  std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										111
									
								
								tests/core/Test_fftf.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								tests/core/Test_fftf.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,111 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_cshift.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> simd_layout( { vComplexF::Nsimd(),1,1,1});
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
 | 
			
		||||
  int vol = 1;
 | 
			
		||||
  for(int d=0;d<latt_size.size();d++){
 | 
			
		||||
    vol = vol * latt_size[d];
 | 
			
		||||
  }
 | 
			
		||||
  GridCartesian        Fine(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
 | 
			
		||||
  LatticeComplexF     one(&Fine);
 | 
			
		||||
  LatticeComplexF      zz(&Fine);
 | 
			
		||||
  LatticeComplexF       C(&Fine);
 | 
			
		||||
  LatticeComplexF  Ctilde(&Fine);
 | 
			
		||||
  LatticeComplexF    coor(&Fine);
 | 
			
		||||
 | 
			
		||||
  LatticeSpinMatrixF    S(&Fine);
 | 
			
		||||
  LatticeSpinMatrixF    Stilde(&Fine);
 | 
			
		||||
  
 | 
			
		||||
  std::vector<int> p({1,2,3,2});
 | 
			
		||||
 | 
			
		||||
  one = ComplexF(1.0,0.0);
 | 
			
		||||
  zz  = ComplexF(0.0,0.0);
 | 
			
		||||
 | 
			
		||||
  ComplexF ci(0.0,1.0);
 | 
			
		||||
 | 
			
		||||
  C=zero;
 | 
			
		||||
  for(int mu=0;mu<4;mu++){
 | 
			
		||||
    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
    LatticeCoordinate(coor,mu);
 | 
			
		||||
    C = C - (TwoPiL * p[mu]) * coor;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  C = exp(C*ci);
 | 
			
		||||
 | 
			
		||||
  S=zero;
 | 
			
		||||
  S = S+C;
 | 
			
		||||
 | 
			
		||||
  FFT theFFT(&Fine);
 | 
			
		||||
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,0,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,1,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,2,FFT::forward);  C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Ctilde,C,3,FFT::forward);  std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
 | 
			
		||||
  //  C=zero;
 | 
			
		||||
  //  Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
 | 
			
		||||
  TComplexF cVol;
 | 
			
		||||
  cVol()()() = vol;
 | 
			
		||||
 | 
			
		||||
  C=zero;
 | 
			
		||||
  pokeSite(cVol,C,p);
 | 
			
		||||
  C=C-Ctilde;
 | 
			
		||||
  std::cout << "diff scalar "<<norm2(C) << std::endl;
 | 
			
		||||
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,0,FFT::forward);  S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,1,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,2,FFT::forward);  S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
  theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
 | 
			
		||||
 | 
			
		||||
  SpinMatrixF Sp; 
 | 
			
		||||
  Sp = zero; Sp = Sp+cVol;
 | 
			
		||||
 | 
			
		||||
  S=zero;
 | 
			
		||||
  pokeSite(Sp,S,p);
 | 
			
		||||
 | 
			
		||||
  S= S-Stilde;
 | 
			
		||||
  std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
@@ -1,35 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_even_odd_vec Test_cayley_ldop_cr Test_cheby Test_synthetic_lanczos 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cayley_cg_SOURCES=Test_cayley_cg.cc
 | 
			
		||||
Test_cayley_cg_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cayley_coarsen_support_SOURCES=Test_cayley_coarsen_support.cc
 | 
			
		||||
Test_cayley_coarsen_support_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc
 | 
			
		||||
Test_cayley_even_odd_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cayley_even_odd_vec_SOURCES=Test_cayley_even_odd_vec.cc
 | 
			
		||||
Test_cayley_even_odd_vec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cayley_ldop_cr_SOURCES=Test_cayley_ldop_cr.cc
 | 
			
		||||
Test_cayley_ldop_cr_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cheby_SOURCES=Test_cheby.cc
 | 
			
		||||
Test_cheby_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
 | 
			
		||||
Test_synthetic_lanczos_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_zmm_SOURCES=Test_zmm.cc
 | 
			
		||||
Test_zmm_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -44,6 +44,7 @@ struct scal {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
typedef DomainWallFermion<DomainWallVec5dImplR>                      DomainWallVecFermionR;
 | 
			
		||||
typedef ZMobiusFermion<ZDomainWallVec5dImplR>                        ZMobiusVecFermionR;
 | 
			
		||||
typedef MobiusFermion<DomainWallVec5dImplR>                          MobiusVecFermionR;
 | 
			
		||||
typedef MobiusZolotarevFermion<DomainWallVec5dImplR>                 MobiusZolotarevVecFermionR;
 | 
			
		||||
typedef ScaledShamirFermion<DomainWallVec5dImplR>                    ScaledShamirVecFermionR;
 | 
			
		||||
@@ -117,6 +118,17 @@ int main (int argc, char ** argv)
 | 
			
		||||
  TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
  TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage <<"Z-MobiusFermion test"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
 | 
			
		||||
  std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
 | 
			
		||||
  ZMobiusFermionR     zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
 | 
			
		||||
  ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
 | 
			
		||||
  TestMoo(zDmob,szDmob);
 | 
			
		||||
  TestWhat<ZMobiusFermionR>(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
  TestWhat<ZMobiusVecFermionR>(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
 | 
			
		||||
 
 | 
			
		||||
@@ -26,9 +26,15 @@ See the full license in the file "LICENSE" in the top level distribution directo
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
#include <Grid/PerfCount.h>
 | 
			
		||||
 | 
			
		||||
#ifdef TEST_ZMM
 | 
			
		||||
 | 
			
		||||
int main(int argc,char **argv)
 | 
			
		||||
{
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char **argv) { return 0; }
 | 
			
		||||
#if 0
 | 
			
		||||
#include <simd/Intel512wilson.h>
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
@@ -478,5 +484,12 @@ void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3)
 | 
			
		||||
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
int main(int argc, char **argv)
 | 
			
		||||
{
 | 
			
		||||
  std::cerr << "error: no ZMM test for the selected architecture" << std::endl;
 | 
			
		||||
 | 
			
		||||
  return 1;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,47 +1,36 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
 | 
			
		||||
 | 
			
		||||
tests: Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
 | 
			
		||||
EXTRA_PROGRAMS = Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
 | 
			
		||||
 | 
			
		||||
Test_contfrac_force_SOURCES=Test_contfrac_force.cc
 | 
			
		||||
Test_contfrac_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_force_SOURCES=Test_dwf_force.cc
 | 
			
		||||
Test_dwf_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_gpforce_SOURCES=Test_dwf_gpforce.cc
 | 
			
		||||
Test_dwf_gpforce_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_gpdwf_force_SOURCES=Test_gpdwf_force.cc
 | 
			
		||||
Test_gpdwf_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc
 | 
			
		||||
Test_gp_rect_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_gpwilson_force_SOURCES=Test_gpwilson_force.cc
 | 
			
		||||
Test_gpwilson_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_partfrac_force_SOURCES=Test_partfrac_force.cc
 | 
			
		||||
Test_partfrac_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rect_force_SOURCES=Test_rect_force.cc
 | 
			
		||||
Test_rect_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_force_SOURCES=Test_wilson_force.cc
 | 
			
		||||
Test_wilson_force_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_force_phiMdagMphi_SOURCES=Test_wilson_force_phiMdagMphi.cc
 | 
			
		||||
Test_wilson_force_phiMdagMphi_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_force_phiMphi_SOURCES=Test_wilson_force_phiMphi.cc
 | 
			
		||||
Test_wilson_force_phiMphi_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,75 +1,57 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
 | 
			
		||||
 | 
			
		||||
tests: Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
 | 
			
		||||
EXTRA_PROGRAMS = Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
 | 
			
		||||
 | 
			
		||||
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
 | 
			
		||||
Test_hmc_EODWFRatio_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_EODWFRatio_Gparity_SOURCES=Test_hmc_EODWFRatio_Gparity.cc
 | 
			
		||||
Test_hmc_EODWFRatio_Gparity_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_EOWilsonFermionGauge_SOURCES=Test_hmc_EOWilsonFermionGauge.cc
 | 
			
		||||
Test_hmc_EOWilsonFermionGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_EOWilsonRatio_SOURCES=Test_hmc_EOWilsonRatio.cc
 | 
			
		||||
Test_hmc_EOWilsonRatio_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_GparityIwasakiGauge_SOURCES=Test_hmc_GparityIwasakiGauge.cc
 | 
			
		||||
Test_hmc_GparityIwasakiGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_GparityWilsonGauge_SOURCES=Test_hmc_GparityWilsonGauge.cc
 | 
			
		||||
Test_hmc_GparityWilsonGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_IwasakiGauge_SOURCES=Test_hmc_IwasakiGauge.cc
 | 
			
		||||
Test_hmc_IwasakiGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_RectGauge_SOURCES=Test_hmc_RectGauge.cc
 | 
			
		||||
Test_hmc_RectGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_WilsonAdjointFermionGauge_SOURCES=Test_hmc_WilsonAdjointFermionGauge.cc
 | 
			
		||||
Test_hmc_WilsonAdjointFermionGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_WilsonFermionGauge_SOURCES=Test_hmc_WilsonFermionGauge.cc
 | 
			
		||||
Test_hmc_WilsonFermionGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_WilsonGauge_SOURCES=Test_hmc_WilsonGauge.cc
 | 
			
		||||
Test_hmc_WilsonGauge_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_hmc_WilsonRatio_SOURCES=Test_hmc_WilsonRatio.cc
 | 
			
		||||
Test_hmc_WilsonRatio_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_multishift_sqrt_SOURCES=Test_multishift_sqrt.cc
 | 
			
		||||
Test_multishift_sqrt_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_remez_SOURCES=Test_remez.cc
 | 
			
		||||
Test_remez_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rhmc_EOWilson1p1_SOURCES=Test_rhmc_EOWilson1p1.cc
 | 
			
		||||
Test_rhmc_EOWilson1p1_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rhmc_EOWilsonRatio_SOURCES=Test_rhmc_EOWilsonRatio.cc
 | 
			
		||||
Test_rhmc_EOWilsonRatio_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rhmc_Wilson1p1_SOURCES=Test_rhmc_Wilson1p1.cc
 | 
			
		||||
Test_rhmc_Wilson1p1_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_rhmc_WilsonRatio_SOURCES=Test_rhmc_WilsonRatio.cc
 | 
			
		||||
Test_rhmc_WilsonRatio_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_qdpxx_loops_staples Test_qdpxx_munprec
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_qdpxx_loops_staples_SOURCES=Test_qdpxx_loops_staples.cc
 | 
			
		||||
Test_qdpxx_loops_staples_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_qdpxx_munprec_SOURCES=Test_qdpxx_munprec.cc
 | 
			
		||||
Test_qdpxx_munprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,6 +1,4 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
AM_CXXFLAGS += `chroma-config --cxxflags`
 | 
			
		||||
AM_LDFLAGS  += `chroma-config --ldflags` `chroma-config --libs`
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include `chroma-config --cxxflags`
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib `chroma-config --ldflags` `chroma-config --libs`
 | 
			
		||||
bin_PROGRAMS=
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
@@ -1,55 +0,0 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS += Test_cf_cr_unprec Test_contfrac_cg Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_fpgcr Test_dwf_hdcr Test_dwf_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_cf_cr_unprec_SOURCES=Test_cf_cr_unprec.cc
 | 
			
		||||
Test_cf_cr_unprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_contfrac_cg_SOURCES=Test_contfrac_cg.cc
 | 
			
		||||
Test_contfrac_cg_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_cg_prec_SOURCES=Test_dwf_cg_prec.cc
 | 
			
		||||
Test_dwf_cg_prec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_cg_schur_SOURCES=Test_dwf_cg_schur.cc
 | 
			
		||||
Test_dwf_cg_schur_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_cg_unprec_SOURCES=Test_dwf_cg_unprec.cc
 | 
			
		||||
Test_dwf_cg_unprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_cr_unprec_SOURCES=Test_dwf_cr_unprec.cc
 | 
			
		||||
Test_dwf_cr_unprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_fpgcr_SOURCES=Test_dwf_fpgcr.cc
 | 
			
		||||
Test_dwf_fpgcr_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
 | 
			
		||||
Test_dwf_hdcr_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
 | 
			
		||||
Test_dwf_lanczos_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
 | 
			
		||||
Test_wilson_cg_prec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_cg_schur_SOURCES=Test_wilson_cg_schur.cc
 | 
			
		||||
Test_wilson_cg_schur_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc
 | 
			
		||||
Test_wilson_cg_unprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_wilson_cr_unprec_SOURCES=Test_wilson_cr_unprec.cc
 | 
			
		||||
Test_wilson_cr_unprec_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
@@ -1,19 +1 @@
 | 
			
		||||
# additional include paths necessary to compile the C++ library
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS =
 | 
			
		||||
SUBDIRS =
 | 
			
		||||
 | 
			
		||||
AM_CXXFLAGS = -I$(top_srcdir)/include
 | 
			
		||||
AM_LDFLAGS = -L$(top_builddir)/lib
 | 
			
		||||
 | 
			
		||||
if USE_LAPACK
 | 
			
		||||
AM_CXXFLAGS += -DUSE_LAPACK
 | 
			
		||||
if USE_LAPACK_LIB
 | 
			
		||||
#if test "X${ac_LAPACK}X" != XyesX 
 | 
			
		||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
 | 
			
		||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
 | 
			
		||||
#fi
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
include Make.inc
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user