1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Merge branch 'develop' into feature/hirep

This commit is contained in:
Guido Cossu 2016-09-01 12:59:53 +01:00
commit 0fd179fb33
75 changed files with 16078 additions and 2795 deletions

25
.gitignore vendored
View File

@ -5,7 +5,6 @@
*.o
*.obj
# Editor files #
################
*~
@ -48,6 +47,7 @@ Config.h.in
config.log
config.status
.deps
*.inc
# http://www.gnu.org/software/autoconf #
########################################
@ -63,19 +63,7 @@ config.sub
config.guess
INSTALL
.dirstamp
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
ltmain.sh
# Logs and databases #
######################
@ -101,3 +89,12 @@ build*/*
#####################
*.xcodeproj/*
build.sh
# Eigen source #
################
lib/Eigen/*
# libtool macros #
##################
m4/lt*
m4/libtool.m4

View File

@ -23,6 +23,8 @@ matrix:
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-4.9
- compiler: gcc
@ -35,6 +37,8 @@ matrix:
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: VERSION=-5
- compiler: clang
@ -47,6 +51,8 @@ matrix:
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang
@ -59,6 +65,8 @@ matrix:
- libmpfr-dev
- libgmp-dev
- libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
@ -69,6 +77,7 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install:
@ -82,14 +91,20 @@ install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
script:
- ./autogen.sh
- ./bootstrap.sh
- mkdir build
- cd build
- ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j1 -C prerequisites
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1
- ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -1,6 +1,5 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/include/
SUBDIRS = lib benchmarks tests
SUBDIRS = prerequisites lib benchmarks tests
filelist: $(SUBDIRS)
AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4

110
README.md
View File

@ -1,8 +1,28 @@
# Grid [![Build Status](https://travis-ci.org/paboyle/Grid.svg?branch=master)](https://travis-ci.org/paboyle/Grid)
Data parallel C++ mathematical object library
# Grid
<table>
<tr>
<td>Last stable release</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
</td>
</tr>
<tr>
<td>Development branch</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
</td>
</tr>
</table>
Last update 2015/7/30
**Data parallel C++ mathematical object library.**
Please send all pull requests to the `develop` branch.
License: GPL v2.
Last update 2016/08/03.
### Description
This library provides data parallel C++ container classes with internal memory layout
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
@ -22,37 +42,75 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way).
These are presented as
vRealF, vRealD, vComplexF, vComplexD
internal vector data types. These may be useful in themselves for other programmers.
The corresponding scalar types are named
RealF, RealD, ComplexF, ComplexD
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
MPI, OpenMP, and SIMD parallelism are present in the library.
Please see https://arxiv.org/abs/1512.03487 for more detail.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
are examples:
### Installation
First, start by cloning the repository:
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
``` bash
git clone https://github.com/paboyle/Grid.git
```
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
Then enter the cloned directory and set up the build system:
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
``` bash
cd Grid
./bootstrap.sh
```
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
Note: Before running configure it could be necessary to execute the script
script/filelist
Now you can execute the `configure` script to generate makefiles (here from a build directory):
``` bash
mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
```
For developers:
Use reconfigure_script in the scripts/ directory to create the autotools environment
where `--enable-precision=` set the default precision (`single` or `double`),
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
`shmem`), and `<path>` should be replaced by the prefix path where you want to
install Grid. The `mpi-auto` communication option set `configure` to determine
automatically how to link to MPI. Other options are available, use `configure
--help` to display them. Like with any other program using GNU autotool, the
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build.
Finally, you can build and install Grid:
``` bash
make; make install
```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
``` bash
make -C tests/<subdir> tests
```
### Possible SIMD types
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
| String | Description |
| ----------- | -------------------------------------- |
| `GEN` | generic portable vector code |
| `SSE4` | SSE 4.2 (128 bit) |
| `AVX` | AVX (256 bit) |
| `AVXFMA4` | AVX (256 bit) + FMA |
| `AVX2` | AVX 2 (256 bit) |
| `AVX512` | AVX 512 bit |
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
| `ICMI` | Intel ICMI instructions (512 bit) |
Alternatively, some CPU codenames can be directly used:
| String | Description |
| ----------- | -------------------------------------- |
| `KNC` | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |

View File

@ -1,4 +0,0 @@
aclocal -I m4
autoheader -f
automake -f --add-missing
autoconf -f

View File

@ -194,7 +194,7 @@ int main (int argc, char ** argv)
}
}
#if 0
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
@ -315,7 +315,7 @@ int main (int argc, char ** argv)
}
}
#endif
Grid_finalize();
}

View File

@ -61,6 +61,8 @@ int main (int argc, char ** argv)
QCD::WilsonKernelsStatic::AsmOpt=0;
}
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;

View File

@ -0,0 +1,117 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Richard Rollins <rprollins@users.noreply.github.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
bool overlapComms = false;
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag );
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
typename WilsonFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> seeds({1,2,3,4});
RealD mass = 0.1;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
int Lmax = 32;
int dmin = 0;
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
for (int L=8; L<=Lmax; L*=2)
{
std::vector<int> latt_size = std::vector<int>(4,L);
for(int d=4; d>dmin; d--)
{
if ( d<=3 ) { latt_size[d] *= 2; }
std::cout << GridLogMessage;
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
std::cout << latt_size.back() << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
bench_wilson(src,result,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << std::endl;
}
}
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
Grid_finalize();
}
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag )
{
int ncall = 1000;
double t0 = usecond();
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
double t1 = usecond();
double flops = 1344 * volume * ncall;
std::cout << flops/(t1-t0) << "\t\t";
}

View File

@ -40,14 +40,20 @@ int main(int argc,char **argv)
std::ofstream os("zmm.dat");
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
for(int L=4;L<=32;L+=4){
for(int m=1;m<=2;m++){
for(int Ls=8;Ls<=16;Ls+=8){
std::vector<int> grid({L,L,m*L,m*L});
std::cout << GridLogMessage <<"\t";
for(int i=0;i<4;i++) {
std::cout << grid[i]<<"x";
}
std::cout << Ls<<std::endl;
std::cout << Ls<<"\t\t";
bench(os,grid,Ls);
}
}
@ -104,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
RealD M5 =1.8;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=50;
double t0=usecond();
for(int i=0;i<ncall;i++){
@ -116,7 +121,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
double flops=1344*volume/2;
mfc = flops*ncall/(t1-t0);
std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl;
std::cout<<mfc<<"\t\t";
QCD::WilsonKernelsStatic::AsmOpt=1;
t0=usecond();
@ -125,7 +130,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
}
t1=usecond();
mfa = flops*ncall/(t1-t0);
std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl;
std::cout<<mfa<<"\t\t";
/*
int dag=DaggerNo;
t0=usecond();
@ -163,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
//resulta = (-0.5) * resulta;
diff = resulto-resulta;
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
std::cout<<std::endl;
std::cout<<norm2(diff)<<std::endl;
return 0;
}

View File

@ -1,39 +0,0 @@
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_dwf_ntpf Benchmark_dwf_sweep Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
Benchmark_comms_SOURCES=Benchmark_comms.cc
Benchmark_comms_LDADD=-lGrid
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
Benchmark_dwf_LDADD=-lGrid
Benchmark_dwf_ntpf_SOURCES=Benchmark_dwf_ntpf.cc
Benchmark_dwf_ntpf_LDADD=-lGrid
Benchmark_dwf_sweep_SOURCES=Benchmark_dwf_sweep.cc
Benchmark_dwf_sweep_LDADD=-lGrid
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
Benchmark_memory_asynch_LDADD=-lGrid
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
Benchmark_memory_bandwidth_LDADD=-lGrid
Benchmark_su3_SOURCES=Benchmark_su3.cc
Benchmark_su3_LDADD=-lGrid
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
Benchmark_wilson_LDADD=-lGrid
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
Benchmark_zmm_LDADD=-lGrid

View File

@ -1,8 +1 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
include Make.inc

19
bootstrap.sh Executable file
View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
echo "-- deploying Eigen source..."
wget ${EIGEN_URL}
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
rm `basename ${EIGEN_URL}`
echo "-- copying fftw prototypes..."
wget ${FFTW_URL}
./scripts/update_fftw.sh `basename ${FFTW_URL}`
rm `basename ${FFTW_URL}`
echo '-- generating Make.inc files...'
./scripts/filelist
echo '-- generating configure script...'
autoreconf -fvi

15329
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,277 +1,293 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
#
# Project Grid package
#
# Time-stamp: <2015-07-10 17:46:21 neo>
AC_PREREQ([2.63])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
AC_CANONICAL_SYSTEM
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4])
AC_LINK_FILES(lib,include/Grid )
AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_MSG_NOTICE([
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Configuring $PACKAGE v$VERSION for $host
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
])
# Checks for programs.
############### Checks for programs
AC_LANG(C++)
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX
############ openmp ###############
AC_OPENMP
AC_PROG_RANLIB
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
AX_EXT
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)
ac_openmp=no
# Checks for header files.
if test "${OPENMP_CXXFLAGS}X" != "X"; then
ac_openmp=yes
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
fi
############ libtool ###############
LT_INIT
############### Checks for header files
AC_CHECK_HEADERS(stdint.h)
AC_CHECK_HEADERS(mm_malloc.h)
AC_CHECK_HEADERS(malloc/malloc.h)
AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(endian.h)
AC_CHECK_HEADERS(execinfo.h)
AC_CHECK_HEADERS(gmp.h)
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
# Checks for typedefs, structures, and compiler characteristics.
############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
# Checks for library functions.
echo
echo Checking libraries
echo :::::::::::::::::::::::::::::::::::::::::::
############### GMP and MPFR #################
AC_ARG_WITH([gmp],
[AS_HELP_STRING([--with-gmp=prefix],
[try this for a non-standard install prefix of the GMP library])],
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_gmp/lib" $AM_LDFLAGS])
AC_ARG_WITH([mpfr],
[AS_HELP_STRING([--with-mpfr=prefix],
[try this for a non-standard install prefix of the MPFR library])],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
################## lapack ####################
AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
case ${ac_LAPACK} in
no)
;;
yes)
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
*)
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
esac
################## FFTW3 ####################
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
#
# What about the MKL library replacement for fftw3 ? How do we know if fftw_execute
# can be found in MKL?
#
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes],
[ac_fftw=no])
case ${ac_fftw} in
no)
echo WARNING libfftw3 not found FFT routines will not work
;;
yes)
AM_LDFLAGS="$AM_LDFLAGS -lfftw3 -lfftw3f"
esac
################ Get compiler informations
AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
AX_COMPILER_VENDOR
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
[vendor of C++ compiler that will compile the code])
AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code])
############### Checks for library functions
CXXFLAGS_CPY=$CXXFLAGS
LDFLAGS_CPY=$LDFLAGS
LIBS_CPY=$LIBS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_CHECK_FUNCS([gettimeofday])
AC_CHECK_LIB([gmp],[__gmpf_init],
[AC_CHECK_LIB([mpfr],[mpfr_init],
[AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
[have_mpfr=true]
[LIBS="$LIBS -lmpfr"],
[AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
[have_gmp=true]
[LIBS="$LIBS -lgmp"],
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
#AC_CHECK_LIB([gmp],[__gmpf_init],,
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.gmplib.org)])
if test "${ac_LAPACK}x" != "nox"; then
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
[AC_MSG_ERROR("LAPACK enabled but library not found")])
fi
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
#AC_CHECK_LIB([mpfr],[mpfr_init],,
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.mpfr.org/)])
#
# SIMD instructions selection
#
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
############### SIMD instruction selection
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
supported=no
ac_ZMM=no;
case ${ax_cv_cxx_compiler_vendor} in
clang|gnu)
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2';;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';;
AVX512|AVX512MIC|KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
SIMD_FLAGS='';;
GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';;
*)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
esac;;
intel)
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2 -xsse4.2';;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx -xavx';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -xavx -mfma';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-xcore-avx512';;
AVX512MIC|KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
SIMD_FLAGS='-xmic-avx512';;
IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
SIMD_FLAGS='';;
GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';;
*)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
esac;;
*)
AC_MSG_WARN([Compiler unknown, using generic vector code])
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
esac
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
case ${ac_SIMD} in
SSE4)
echo Configuring for SSE4
AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
supported=yes
else
AC_MSG_WARN([Your processor does not support SSE4 instructions])
fi
;;
AVX)
echo Configuring for AVX
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVXFMA4)
echo Configuring for AVX
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVX2)
echo Configuring for AVX2
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX2 instructions])
fi
;;
AVX512)
echo Configuring for AVX512
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
supported="cross compilation"
ac_ZMM=yes;
;;
IMCI)
echo Configuring for IMCI
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
supported="cross compilation"
ac_ZMM=no;
;;
NEONv8)
echo Configuring for experimental ARMv8a support
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
supported="cross compilation"
;;
DEBUG)
echo Configuring without SIMD support - only for compiler DEBUGGING!
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
;;
*)
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
;;
AVX512|AVX512MIC|KNL)
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
*)
;;
esac
case ${ac_ZMM} in
yes)
echo Enabling ZMM source code
;;
no)
echo Disabling ZMM source code
;;
esac
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
############### precision selection
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
case ${ac_PRECISION} in
single)
echo default precision is single
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
;;
double)
echo default precision is double
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
;;
esac
#
# Comms selection
#
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
############### communication type selection
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
case ${ac_COMMS} in
none)
echo Configuring for NO communications
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
;;
mpi-auto)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
LX_FIND_MPI
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
;;
mpi)
echo Configuring for MPI communications
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
;;
shmem)
echo Configuring for SHMEM communications
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
;;
*)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;;
esac
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
#
# RNG selection
#
############### RNG selection
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
[Select Random Number Generator to be used])],\
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
case ${ac_RNG} in
ranlux48)
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
;;
mt19937)
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
;;
*)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;;
esac
#
# SDE timing mode
#
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\
############### timer option
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
[Enable system dependent high res timers])],\
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
case ${ac_TIMERS} in
yes)
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
;;
no)
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;;
*)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;;
esac
#
# Chroma regression tests
#
############### Chroma regression test
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
case ${ac_CHROMA} in
yes)
echo Enabling tests regressing to Chroma
;;
no)
echo Disabling tests regressing to Chroma
yes|no)
;;
*)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;;
esac
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
#
# Lapack
#
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
case ${ac_LAPACK} in
yes)
echo Enabling lapack
;;
no)
echo Disabling lapack
;;
*)
echo Enabling lapack at ${ac_LAPACK}
;;
esac
AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
###################################################################
# Checks for doxygen support
# if present enables the "make doxyfile" command
echo
echo Checking doxygen support
echo :::::::::::::::::::::::::::::::::::::::::::
############### Doxygen
AC_PROG_DOXYGEN
if test -n "$DOXYGEN"
@ -279,9 +295,14 @@ then
AC_CONFIG_FILES([docs/doxy.cfg])
fi
echo
echo Creating configuration files
echo :::::::::::::::::::::::::::::::::::::::::::
############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS])
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile)
@ -293,30 +314,34 @@ AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(prerequisites/Makefile)
AC_OUTPUT
echo "
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The following features are enabled:
----- PLATFORM ----------------------------------------
- architecture (build) : $build_cpu
- os (build) : $build_os
- architecture (target) : $target_cpu
- os (target) : $target_os
- compiler vendor : ${ax_cv_cxx_compiler_vendor}
- compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
- SIMD : ${ac_SIMD}
- Threading : ${ac_openmp}
- Communications type : ${ac_COMMS}
- Default precision : ${ac_PRECISION}
- RNG choice : ${ac_RNG}
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
- LAPACK : ${ac_LAPACK}
- FFTW : ${ac_fftw}
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
- Supported SIMD flags : $SIMD_FLAGS
----------------------------------------------------------
- enabled simd support : ${ac_SIMD} (config macro says supported: $supported )
- communications type : ${ac_COMMS}
- default precision : ${ac_PRECISION}
- RNG choice : ${ac_RNG}
- LAPACK : ${ac_LAPACK}
----- BUILD FLAGS -------------------------------------
- CXXFLAGS: "${AM_CXXFLAGS} ${CXXFLAGS}"
- LDFLAGS: "${AM_LDFLAGS} ${LDFLAGS}"
- LIBS: "${LIBS} "
-------------------------------------------------------
"

276
lib/FFT.h Normal file
View File

@ -0,0 +1,276 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Cshift.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _GRID_FFT_H_
#define _GRID_FFT_H_
#ifdef HAVE_FFTW
#include <fftw3.h>
#endif
namespace Grid {
template<class scalar> struct FFTW { };
#ifdef HAVE_FFTW
template<> struct FFTW<ComplexD> {
public:
typedef fftw_complex FFTW_scalar;
typedef fftw_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftw_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftw_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftw_destroy_plan(p);
}
};
template<> struct FFTW<ComplexF> {
public:
typedef fftwf_complex FFTW_scalar;
typedef fftwf_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftwf_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftwf_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftwf_destroy_plan(p);
}
};
#endif
#ifndef FFTW_FORWARD
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#endif
class FFT {
private:
GridCartesian *vgrid;
GridCartesian *sgrid;
int Nd;
double flops;
double flops_call;
uint64_t usec;
std::vector<int> dimensions;
std::vector<int> processors;
std::vector<int> processor_coor;
public:
static const int forward=FFTW_FORWARD;
static const int backward=FFTW_BACKWARD;
double Flops(void) {return flops;}
double MFlops(void) {return flops/usec;}
FFT ( GridCartesian * grid ) :
vgrid(grid),
Nd(grid->_ndimension),
dimensions(grid->_fdimensions),
processors(grid->_processors),
processor_coor(grid->_processor_coor)
{
flops=0;
usec =0;
std::vector<int> layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors);
};
~FFT ( void) {
delete sgrid;
}
template<class vobj>
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
conformable(result._grid,vgrid);
conformable(source._grid,vgrid);
int L = vgrid->_ldimensions[dim];
int G = vgrid->_fdimensions[dim];
std::vector<int> layout(Nd,1);
std::vector<int> pencil_gd(vgrid->_fdimensions);
pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors);
// Construct pencils
typedef typename vobj::scalar_object sobj;
typedef typename sobj::scalar_type scalar;
Lattice<vobj> ssource(vgrid); ssource =source;
Lattice<sobj> pgsource(&pencil_g);
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
#ifndef HAVE_FFTW
assert(0);
#else
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
{
int Ncomp = sizeof(sobj)/sizeof(scalar);
int Nlow = 1;
for(int d=0;d<dim;d++){
Nlow*=vgrid->_ldimensions[d];
}
int rank = 1; /* 1d transforms */
int n[] = {G}; /* 1d transforms of length G */
int howmany = Ncomp;
int odist,idist,istride,ostride;
idist = odist = 1; /* Distance between consecutive FT's */
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
int *inembed = n, *onembed = n;
int sign = FFTW_FORWARD;
if (inverse) sign = FFTW_BACKWARD;
FFTW_plan p;
{
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
in,inembed,
istride,idist,
out,onembed,
ostride, odist,
sign,FFTW_ESTIMATE);
}
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s;
peekLocalSite(s,ssource,lcoor);
lcoor[dim]+=p*L;
pokeLocalSite(s,pgsource,lcoor);
}
ssource = Cshift(ssource,dim,L);
}
// Loop over orthog coords
int NN=pencil_g.lSites();
GridStopWatch Timer;
Timer.Start();
PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
}
Timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor;
// extract the result
sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgresult,gcoor);
pokeLocalSite(s,result,lcoor);
}
FFTW<scalar>::fftw_destroy_plan(p);
}
#endif
}
};
}
#endif

View File

@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/Simd.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/Init.h>
#include <Grid/FFT.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>

View File

@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]);
}
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
std::vector<int> cores(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
GridLogConfigure(logstreams);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,3 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/include/
extra_sources=
if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc
@ -20,16 +17,8 @@ endif
include Make.inc
include Eigen.inc
lib_LIBRARIES = libGrid.a
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
fftwdir = $(prefix)/lib/
fftw_DATA = libfftw3.a
#
# Include files
#
otherincludedir = $(includedir)/Grid
nobase_otherinclude_HEADERS =$(HFILES) $(EFILES) fftw3.h Config.h
lib_LTLIBRARIES = libGrid.la
libGrid_la_SOURCES = $(CCFILES) $(extra_sources)
libGrid_ladir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

View File

@ -18,10 +18,10 @@
#include <stddef.h>
#include <Config.h>
#ifdef HAVE_GMP_H
#ifdef HAVE_LIBGMP
#include "bigfloat.h"
#else
#include "algorithms/approx/bigfloat_double.h"
#include "bigfloat_double.h"
#endif
#define JMAX 10000 //Maximum number of iterations of Newton's approximation

View File

@ -127,21 +127,12 @@ class CartesianCommunicator {
int recv_from_rank,
int bytes);
void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
////////////////////////////////////////////////////////////

View File

@ -144,28 +144,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
}
// Basic Halo comms primitive
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
MPI_Startall(list.size(),&list[0]);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
@ -173,12 +151,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromInit(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromBegin(reqs);
for(int i=0;i<reqs.size();i++){
list.push_back(reqs[i]);
}
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{

View File

@ -84,19 +84,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);

View File

@ -268,10 +268,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0); //unimplemented
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
@ -284,15 +280,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest);
}
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0); // Unimplemented
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
// shmem_quiet(); // I'm done

View File

@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
}
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int idx=0;idx<ig->lSites();idx++){
std::vector<int> lcoor(ni);
ig->LocalIndexToLocalCoor(idx,lcoor);
@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
}
template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,lowDim,lcoor);
pokeLocalSite(s,higherDim,hcoor);
}
}
}
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,higherDim,hcoor);
pokeLocalSite(s,lowDim,lcoor);
}
}
}
template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{

View File

@ -111,6 +111,8 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>;
@ -120,7 +122,9 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>;
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \
@ -143,6 +147,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -185,6 +190,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;

View File

@ -54,18 +54,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass;
std::vector<Coeff_t> diag (Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag = bs;
std::vector<RealD> upper= cs;
std::vector<RealD> lower= cs;
std::vector<Coeff_t> diag = bs;
std::vector<Coeff_t> upper= cs;
std::vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = beo;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = beo;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
@ -88,9 +88,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
@ -104,9 +104,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
@ -129,9 +129,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0);
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
@ -141,9 +141,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag =bs;
std::vector<RealD> upper=cs;
std::vector<RealD> lower=cs;
std::vector<Coeff_t> diag =bs;
std::vector<Coeff_t> upper=cs;
std::vector<Coeff_t> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper);
@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
{
int Ls=this->Ls;
@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
}
{
double delta_d=mass*cee[Ls-1];
Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}

View File

@ -62,16 +62,16 @@ namespace Grid {
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper);
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper);
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
virtual void Instantiatable(void)=0;
@ -91,23 +91,23 @@ namespace Grid {
RealD mass;
// Cayley form Moebius (tanh and zolotarev)
std::vector<RealD> omega;
std::vector<RealD> bs; // S dependent coeffs
std::vector<RealD> cs;
std::vector<RealD> as;
std::vector<Coeff_t> omega;
std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<Coeff_t> cs;
std::vector<Coeff_t> as;
// For preconditioning Cayley form
std::vector<RealD> bee;
std::vector<RealD> cee;
std::vector<RealD> aee;
std::vector<RealD> beo;
std::vector<RealD> ceo;
std::vector<RealD> aeo;
std::vector<Coeff_t> bee;
std::vector<Coeff_t> cee;
std::vector<Coeff_t> aee;
std::vector<Coeff_t> beo;
std::vector<Coeff_t> ceo;
std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
std::vector<RealD> lee;
std::vector<RealD> leem;
std::vector<RealD> uee;
std::vector<RealD> ueem;
std::vector<RealD> dee;
std::vector<Coeff_t> lee;
std::vector<Coeff_t> leem;
std::vector<Coeff_t> uee;
std::vector<Coeff_t> ueem;
std::vector<Coeff_t> dee;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
@ -117,20 +117,19 @@ namespace Grid {
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
};
}
}
#define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
@ -82,9 +82,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
#endif
}}

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
@ -65,9 +65,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){

View File

@ -53,9 +53,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
@ -121,9 +121,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
chi.checkerboard=psi.checkerboard;
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -103,7 +103,7 @@ namespace Grid {
typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base)
@ -122,9 +122,9 @@ namespace Grid {
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
@ -211,10 +211,9 @@ namespace Grid {
static const int Dimension = Nrepresentation;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
@ -312,7 +311,7 @@ namespace Grid {
static const int Dimension = Nrepresentation;
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
@ -515,6 +514,7 @@ namespace Grid {
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double

View File

@ -38,90 +38,6 @@ int WilsonKernelsStatic::AsmOpt;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
/*
template <class Impl>
typename std::enable_if<Impl::Dimension == 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, in,
out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, in,
out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
}
template <class Impl>
typename std::enable_if<Impl::Dimension != 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
out);
sF++;
}
sU++;
}
}
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out,
typename std::enable_if<Impl::Dimension == 3, int>::type = 0)
{
// No asm implementation yet.
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
// else
for(int site=0;site<Ns;site++) {
for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
template <class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out,
typename std::enable_if<Impl::Dimension != 3, int>::type = 0) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
*/
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////

View File

@ -33,26 +33,27 @@ directory
namespace Grid {
namespace QCD {
namespace QCD {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static int AsmOpt; // these are a temporary hack
static int HandOpt; // these are a temporary hack
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static int AsmOpt; // these are a temporary hack
static int HandOpt; // these are a temporary hack
};
template <class Impl>
class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
public:
public:
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DiracOptDhopSite(
@ -102,35 +103,45 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc== 3 && EnableBool, void>::type
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
// No asm implementation yet.
// if ( AsmOpt )
// WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
// else
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
Ns, in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
}
sU++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
typename std::enable_if<
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
@ -140,7 +151,7 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
sU++;
}
}
void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
@ -165,6 +176,12 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
@ -177,7 +194,9 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
}
};
}
}
#endif

View File

@ -73,12 +73,21 @@ static int signInit = setupSigns();
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A
#undef KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP
#undef VMOVRDUP
#undef MAYBEPERM
@ -89,14 +98,25 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);

View File

@ -30,7 +30,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_PROJMEM(base);
#else
XM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm);
} else {
LOAD_CHI(base);
@ -41,15 +45,22 @@
MULT_2SPIN_DIR_PFXP(Xp,basep);
}
LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_RECON;
#else
XM_RECON;
#endif
////////////////////////////////
// Yp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_PROJMEM(base);
#else
YM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm);
} else {
LOAD_CHI(base);
@ -60,7 +71,11 @@
MULT_2SPIN_DIR_PFYP(Yp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_RECON_ACCUM;
#else
YM_RECON_ACCUM;
#endif
////////////////////////////////
// Zp
@ -68,7 +83,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_PROJMEM(base);
#else
ZM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm);
} else {
LOAD_CHI(base);
@ -79,7 +98,11 @@
MULT_2SPIN_DIR_PFZP(Zp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_RECON_ACCUM;
#else
ZM_RECON_ACCUM;
#endif
////////////////////////////////
// Tp
@ -87,7 +110,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_PROJMEM(base);
#else
TM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm);
} else {
LOAD_CHI(base);
@ -98,7 +125,11 @@
MULT_2SPIN_DIR_PFTP(Tp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_RECON_ACCUM;
#else
TM_RECON_ACCUM;
#endif
////////////////////////////////
// Xm
@ -107,7 +138,11 @@
// basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_PROJMEM(base);
#else
XP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm);
} else {
LOAD_CHI(base);
@ -118,7 +153,11 @@
MULT_2SPIN_DIR_PFXM(Xm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_RECON_ACCUM;
#else
XP_RECON_ACCUM;
#endif
////////////////////////////////
// Ym
@ -126,7 +165,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_PROJMEM(base);
#else
YP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm);
} else {
LOAD_CHI(base);
@ -137,7 +180,11 @@
MULT_2SPIN_DIR_PFYM(Ym,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_RECON_ACCUM;
#else
YP_RECON_ACCUM;
#endif
////////////////////////////////
// Zm
@ -145,7 +192,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_PROJMEM(base);
#else
ZP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm);
} else {
LOAD_CHI(base);
@ -156,7 +207,11 @@
MULT_2SPIN_DIR_PFZM(Zm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_RECON_ACCUM;
#else
ZP_RECON_ACCUM;
#endif
////////////////////////////////
// Tm
@ -164,7 +219,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_PROJMEM(base);
#else
TP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm);
} else {
LOAD_CHI(base);
@ -175,7 +234,11 @@
MULT_2SPIN_DIR_PFTM(Tm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_RECON_ACCUM;
#else
TP_RECON_ACCUM;
#endif
basep= st.GetPFInfo(nent,plocal); nent++;
SAVE_RESULT(base,basep);

View File

@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though //
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
}}

View File

@ -0,0 +1,79 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
}
}
#endif

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_empty.h
Source file: ./lib/simd/Grid_generic.h
Copyright (C) 2015
@ -26,14 +26,6 @@ Author: neo <cossu@post.kek.jp>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//----------------------------------------------------------------------
/*! @file Grid_sse4.h
@brief Empty Optimization libraries for debugging
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:28:02 neo>
//----------------------------------------------------------------------
namespace Grid {
namespace Optimization {

View File

@ -38,8 +38,8 @@ directory
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
#ifdef EMPTY_SIMD
#include "Grid_empty.h"
#ifdef GENERIC_VEC
#include "Grid_generic.h"
#endif
#ifdef SSE4
#include "Grid_sse4.h"
@ -388,6 +388,12 @@ class Grid_simd {
}; // end of Grid_simd class definition
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
inline void permute(RealD &y,RealD b, int perm) { y=b; }
inline void permute(RealF &y,RealF b, int perm) { y=b; }
////////////////////////////////////////////////////////////////////
// General rotate
////////////////////////////////////////////////////////////////////

View File

@ -67,15 +67,13 @@ template <class scalar>
struct AsinRealFunctor {
scalar operator()(const scalar &a) const { return asin(real(a)); }
};
template <class scalar>
struct LogRealFunctor {
scalar operator()(const scalar &a) const { return log(real(a)); }
};
template <class scalar>
struct ExpRealFunctor {
scalar operator()(const scalar &a) const { return exp(real(a)); }
struct ExpFunctor {
scalar operator()(const scalar &a) const { return exp(a); }
};
template <class scalar>
struct NotFunctor {
@ -85,7 +83,6 @@ template <class scalar>
struct AbsRealFunctor {
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
};
template <class scalar>
struct PowRealFunctor {
double y;
@ -135,7 +132,6 @@ template <class Scalar>
inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
return SimdApply(CosRealFunctor<S>(), r);
@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
}
template <class S, class V>
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
return SimdApply(ExpRealFunctor<S>(), r);
return SimdApply(ExpFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {

87
m4/ax_compiler_vendor.m4 Normal file
View File

@ -0,0 +1,87 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_compiler_vendor.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_COMPILER_VENDOR
#
# DESCRIPTION
#
# Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, sun,
# hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, microsoft,
# watcom, etc. The vendor is returned in the cache variable
# $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
# Copyright (c) 2008 Matteo Frigo
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 15
AC_DEFUN([AX_COMPILER_VENDOR],
[AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor,
dnl Please add if possible support to ax_compiler_version.m4
[# note: don't check for gcc first since some other compilers define __GNUC__
vendors="intel: __ICC,__ECC,__INTEL_COMPILER
ibm: __xlc__,__xlC__,__IBMC__,__IBMCPP__
pathscale: __PATHCC__,__PATHSCALE__
clang: __clang__
cray: _CRAYC
fujitsu: __FUJITSU
gnu: __GNUC__
sun: __SUNPRO_C,__SUNPRO_CC
hp: __HP_cc,__HP_aCC
dec: __DECC,__DECCXX,__DECC_VER,__DECCXX_VER
borland: __BORLANDC__,__CODEGEARC__,__TURBOC__
comeau: __COMO__
kai: __KCC
lcc: __LCC__
sgi: __sgi,sgi
microsoft: _MSC_VER
metrowerks: __MWERKS__
watcom: __WATCOMC__
portland: __PGI
tcc: __TINYC__
unknown: UNKNOWN"
for ventest in $vendors; do
case $ventest in
*:) vendor=$ventest; continue ;;
*) vencpp="defined("`echo $ventest | sed 's/,/) || defined(/g'`")" ;;
esac
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[
#if !($vencpp)
thisisanerror;
#endif
])], [break])
done
ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=`echo $vendor | cut -d: -f1`
])
])

492
m4/ax_compiler_version.m4 Normal file
View File

@ -0,0 +1,492 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_compiler_version.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_COMPILER_VERSION
#
# DESCRIPTION
#
# This macro retrieves the compiler version and returns it in the cache
# variable $ax_cv_c_compiler_version for C and $ax_cv_cxx_compiler_version
# for C++.
#
# Version is returned as epoch:major.minor.patchversion
#
# Epoch is used in order to have an increasing version number in case of
# marketing change.
#
# Epoch use: * borland compiler use chronologically 0turboc for turboc
# era,
#
# 1borlanc BORLANC++ before 5, 2cppbuilder for cppbuilder era,
# 3borlancpp for return of BORLANC++ (after version 5.5),
# 4cppbuilder for cppbuilder with year version,
# and 5xe for XE era.
#
# An empty string is returned otherwise.
#
# LICENSE
#
# Copyright (c) 2014 Bastien ROUCARIES <roucaries.bastien+autoconf@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 4
# for intel
AC_DEFUN([_AX_COMPILER_VERSION_INTEL],
[ dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[__INTEL_COMPILER/100],,
AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[(__INTEL_COMPILER%100)/10],,
AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[(__INTEL_COMPILER%10)],,
AC_MSG_FAILURE([[[$0]] unknown intel compiler version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# for IBM
AC_DEFUN([_AX_COMPILER_VERSION_IBM],
[ dnl
dnl check between z/OS C/C++ and XL C/C++
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([],
[
#if defined(__COMPILER_VER__)
choke me;
#endif
])],
[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[__xlC__/100],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler major version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[__xlC__%100],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__xlC_ver__/0x100],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_build,
[__xlC_ver__%0x100],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler build version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_build"
],
[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__xlC__%1000],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[(__xlC__/10000)%10],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[(__xlC__/100000)%10],,
AC_MSG_FAILURE([[[$0]] unknown IBM compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
])
# for pathscale
AC_DEFUN([_AX_COMPILER_VERSION_PATHSCALE],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
__PATHCC__,,
AC_MSG_FAILURE([[[$0]] unknown pathscale major]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
__PATHCC_MINOR__,,
AC_MSG_FAILURE([[[$0]] unknown pathscale minor]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__PATHCC_PATCHLEVEL__],,
AC_MSG_FAILURE([[[$0]] unknown pathscale patch level]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# for clang
AC_DEFUN([_AX_COMPILER_VERSION_CLANG],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
__clang_major__,,
AC_MSG_FAILURE([[[$0]] unknown clang major]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
__clang_minor__,,
AC_MSG_FAILURE([[[$0]] unknown clang minor]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__clang_patchlevel__],,0)
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# for crayc
AC_DEFUN([_AX_COMPILER_VERSION_CRAY],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
_RELEASE,,
AC_MSG_FAILURE([[[$0]] unknown crayc release]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
_RELEASE_MINOR,,
AC_MSG_FAILURE([[[$0]] unknown crayc minor]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
])
# for fujitsu
AC_DEFUN([_AX_COMPILER_VERSION_FUJITSU],[
AC_COMPUTE_INT(ax_cv_[]_AC_LANG_ABBREV[]_compiler_version,
__FCC_VERSION,,
AC_MSG_FAILURE([[[$0]]unknown fujitsu release]))
])
# for GNU
AC_DEFUN([_AX_COMPILER_VERSION_GNU],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
__GNUC__,,
AC_MSG_FAILURE([[[$0]] unknown gcc major]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
__GNUC_MINOR__,,
AC_MSG_FAILURE([[[$0]] unknown gcc minor]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__GNUC_PATCHLEVEL__],,
AC_MSG_FAILURE([[[$0]] unknown gcc patch level]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# For sun
AC_DEFUN([_AX_COMPILER_VERSION_SUN],[
m4_define([_AX_COMPILER_VERSION_SUN_NUMBER],
[
#if defined(__SUNPRO_CC)
__SUNPRO_CC
#else
__SUNPRO_C
#endif
])
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_until59,
!!(_AX_COMPILER_VERSION_SUN_NUMBER < 0x1000),,
AC_MSG_FAILURE([[[$0]] unknown sun release version]))
AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_until59" = X1],
[dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
_AX_COMPILER_VERSION_SUN_NUMBER % 0x10,,
AC_MSG_FAILURE([[[$0]] unknown sun patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
(_AX_COMPILER_VERSION_SUN_NUMBER / 0x10) % 0x10,,
AC_MSG_FAILURE([[[$0]] unknown sun minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(_AX_COMPILER_VERSION_SUN_NUMBER / 0x100),,
AC_MSG_FAILURE([[[$0]] unknown sun major version]))
],
[dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
_AX_COMPILER_VERSION_SUN_NUMBER % 0x10,,
AC_MSG_FAILURE([[[$0]] unknown sun patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
(_AX_COMPILER_VERSION_SUN_NUMBER / 0x100) % 0x100,,
AC_MSG_FAILURE([[[$0]] unknown sun minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(_AX_COMPILER_VERSION_SUN_NUMBER / 0x1000),,
AC_MSG_FAILURE([[[$0]] unknown sun major version]))
])
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
AC_DEFUN([_AX_COMPILER_VERSION_HP],[
m4_define([_AX_COMPILER_VERSION_HP_NUMBER],
[
#if defined(__HP_cc)
__HP_cc
#else
__HP_aCC
#endif
])
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_untilA0121,
!!(_AX_COMPILER_VERSION_HP_NUMBER <= 1),,
AC_MSG_FAILURE([[[$0]] unknown hp release version]))
AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_untilA0121" = X1],
[dnl By default output last version with this behavior.
dnl it is so old
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="01.21.00"
],
[dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
(_AX_COMPILER_VERSION_HP_NUMBER % 100),,
AC_MSG_FAILURE([[[$0]] unknown hp release version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
((_AX_COMPILER_VERSION_HP_NUMBER / 100)%100),,
AC_MSG_FAILURE([[[$0]] unknown hp minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
((_AX_COMPILER_VERSION_HP_NUMBER / 10000)%100),,
AC_MSG_FAILURE([[[$0]] unknown hp major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
])
AC_DEFUN([_AX_COMPILER_VERSION_DEC],[dnl
m4_define([_AX_COMPILER_VERSION_DEC_NUMBER],
[
#if defined(__DECC_VER)
__DECC_VER
#else
__DECCXX_VER
#endif
])
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
(_AX_COMPILER_VERSION_DEC_NUMBER % 10000),,
AC_MSG_FAILURE([[[$0]] unknown dec release version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
((_AX_COMPILER_VERSION_DEC_NUMBER / 100000UL)%100),,
AC_MSG_FAILURE([[[$0]] unknown dec minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
((_AX_COMPILER_VERSION_DEC_NUMBER / 10000000UL)%100),,
AC_MSG_FAILURE([[[$0]] unknown dec major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# borland
AC_DEFUN([_AX_COMPILER_VERSION_BORLAND],[dnl
m4_define([_AX_COMPILER_VERSION_TURBOC_NUMBER],
[
#if defined(__TURBOC__)
__TURBOC__
#else
choke me
#endif
])
m4_define([_AX_COMPILER_VERSION_BORLANDC_NUMBER],
[
#if defined(__BORLANDC__)
__BORLANDC__
#else
__CODEGEARC__
#endif
])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(,
_AX_COMPILER_VERSION_TURBOC_NUMBER)],
[dnl TURBOC
AC_COMPUTE_INT(
_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw,
_AX_COMPILER_VERSION_TURBOC_NUMBER,,
AC_MSG_FAILURE([[[$0]] unknown turboc version]))
AS_IF(
[test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw -lt 661 || test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw -gt 1023],
[dnl compute normal version
AC_COMPUTE_INT(
_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
_AX_COMPILER_VERSION_TURBOC_NUMBER % 0x100,,
AC_MSG_FAILURE([[[$0]] unknown turboc minor version]))
AC_COMPUTE_INT(
_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(_AX_COMPILER_VERSION_TURBOC_NUMBER/0x100)%0x100,,
AC_MSG_FAILURE([[[$0]] unknown turboc major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"],
[dnl special version
AS_CASE([$_ax_[]_AC_LANG_ABBREV[]_compiler_version_turboc_raw],
[661],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:1.00"],
[662],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:1.01"],
[663],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="0turboc:2.00"],
[
AC_MSG_WARN([[[$0]] unknown turboc version between 0x295 and 0x400 please report bug])
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=""
])
])
],
# borlandc
[
AC_COMPUTE_INT(
_ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw,
_AX_COMPILER_VERSION_BORLANDC_NUMBER,,
AC_MSG_FAILURE([[[$0]] unknown borlandc version]))
AS_CASE([$_ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw],
dnl BORLANC++ before 5.5
[512] ,[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:2.00"],
[1024],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.00"],
[1024],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.00"],
[1040],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:3.1"],
[1106],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:4.0"],
[1280],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:5.0"],
[1312],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="1borlanc:5.02"],
dnl C++ Builder era
[1328],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="2cppbuilder:3.0"],
[1344],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="2cppbuilder:4.0"],
dnl BORLANC++ after 5.5
[1360],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.5"],
[1361],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.51"],
[1378],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="3borlancpp:5.6.4"],
dnl C++ Builder with year number
[1392],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2006"],
[1424],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2007"],
[1555],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2009"],
[1569],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="4cppbuilder:2010"],
dnl XE version
[1584],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe"],
[1600],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:2"],
[1616],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:3"],
[1632],[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="5xe:4"],
[
AC_MSG_WARN([[[$0]] Unknow borlanc compiler version $_ax_[]_AC_LANG_ABBREV[]_compiler_version_borlandc_raw please report bug])
])
])
])
# COMO
AC_DEFUN([_AX_COMPILER_VERSION_COMEAU],
[ dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[__COMO_VERSION__%100],,
AC_MSG_FAILURE([[[$0]] unknown comeau compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[(__COMO_VERSION__/100)%10],,
AC_MSG_FAILURE([[[$0]] unknown comeau compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
])
# KAI
AC_DEFUN([_AX_COMPILER_VERSION_KAI],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__KCC_VERSION%100],,
AC_MSG_FAILURE([[[$0]] unknown kay compiler patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[(__KCC_VERSION/100)%10],,
AC_MSG_FAILURE([[[$0]] unknown kay compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[(__KCC_VERSION/1000)%10],,
AC_MSG_FAILURE([[[$0]] unknown kay compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
dnl LCC
dnl LCC does not output version...
# SGI
AC_DEFUN([_AX_COMPILER_VERSION_SGI],[
m4_define([_AX_COMPILER_VERSION_SGI_NUMBER],
[
#if defined(_COMPILER_VERSION)
_COMPILER_VERSION
#else
_SGI_COMPILER_VERSION
#endif
])
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[_AX_COMPILER_VERSION_SGI_NUMBER%10],,
AC_MSG_FAILURE([[[$0]] unknown SGI compiler patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
[(_AX_COMPILER_VERSION_SGI_NUMBER/10)%10],,
AC_MSG_FAILURE([[[$0]] unknown SGI compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
[(_AX_COMPILER_VERSION_SGI_NUMBER/100)%10],,
AC_MSG_FAILURE([[[$0]] unknown SGI compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# microsoft
AC_DEFUN([_AX_COMPILER_VERSION_MICROSOFT],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
_MSC_VER%100,,
AC_MSG_FAILURE([[[$0]] unknown microsoft compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(_MSC_VER/100)%100,,
AC_MSG_FAILURE([[[$0]] unknown microsoft compiler major version]))
dnl could be overriden
_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch=0
_ax_[]_AC_LANG_ABBREV[]_compiler_version_build=0
# special case for version 6
AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major" = "X12"],
[AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
_MSC_FULL_VER%1000,,
_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch=0)])
# for version 7
AS_IF([test "X$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major" = "X13"],
[AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
_MSC_FULL_VER%1000,,
AC_MSG_FAILURE([[[$0]] unknown microsoft compiler patch version]))
])
# for version > 8
AS_IF([test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_major -ge 14],
[AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
_MSC_FULL_VER%10000,,
AC_MSG_FAILURE([[[$0]] unknown microsoft compiler patch version]))
])
AS_IF([test $_ax_[]_AC_LANG_ABBREV[]_compiler_version_major -ge 15],
[AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_build,
_MSC_BUILD,,
AC_MSG_FAILURE([[[$0]] unknown microsoft compiler build version]))
])
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_build"
])
# for metrowerks
AC_DEFUN([_AX_COMPILER_VERSION_METROWERKS],[dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
__MWERKS__%0x100,,
AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler patch version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
(__MWERKS__/0x100)%0x10,,
AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(__MWERKS__/0x1000)%0x10,,
AC_MSG_FAILURE([[[$0]] unknown metrowerks compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# for watcom
AC_DEFUN([_AX_COMPILER_VERSION_WATCOM],[dnl
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
__WATCOMC__%100,,
AC_MSG_FAILURE([[[$0]] unknown watcom compiler minor version]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
(__WATCOMC__/100)%100,,
AC_MSG_FAILURE([[[$0]] unknown watcom compiler major version]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor"
])
# for PGI
AC_DEFUN([_AX_COMPILER_VERSION_PORTLAND],[
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_major,
__PGIC__,,
AC_MSG_FAILURE([[[$0]] unknown pgi major]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor,
__PGIC_MINOR__,,
AC_MSG_FAILURE([[[$0]] unknown pgi minor]))
AC_COMPUTE_INT(_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch,
[__PGIC_PATCHLEVEL__],,
AC_MSG_FAILURE([[[$0]] unknown pgi patch level]))
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version="$_ax_[]_AC_LANG_ABBREV[]_compiler_version_major.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_minor.$_ax_[]_AC_LANG_ABBREV[]_compiler_version_patch"
])
# tcc
AC_DEFUN([_AX_COMPILER_VERSION_TCC],[
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=[`tcc -v | $SED 's/^[ ]*tcc[ ]\+version[ ]\+\([0-9.]\+\).*/\1/g'`]
])
# main entry point
AC_DEFUN([AX_COMPILER_VERSION],[dnl
AC_REQUIRE([AX_COMPILER_VENDOR])
AC_REQUIRE([AC_PROG_SED])
AC_CACHE_CHECK([for _AC_LANG compiler version],
ax_cv_[]_AC_LANG_ABBREV[]_compiler_version,
[ dnl
AS_CASE([$ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor],
[intel],[_AX_COMPILER_VERSION_INTEL],
[ibm],[_AX_COMPILER_VERSION_IBM],
[pathscale],[_AX_COMPILER_VERSION_PATHSCALE],
[clang],[_AX_COMPILER_VERSION_CLANG],
[cray],[_AX_COMPILER_VERSION_CRAY],
[fujitsu],[_AX_COMPILER_VERSION_FUJITSU],
[gnu],[_AX_COMPILER_VERSION_GNU],
[sun],[_AX_COMPILER_VERSION_SUN],
[hp],[_AX_COMPILER_VERSION_HP],
[dec],[_AX_COMPILER_VERSION_DEC],
[borland],[_AX_COMPILER_VERSION_BORLAND],
[comeau],[_AX_COMPILER_VERSION_COMEAU],
[kai],[_AX_COMPILER_VERSION_KAI],
[sgi],[_AX_COMPILER_VERSION_SGI],
[microsoft],[_AX_COMPILER_VERSION_MICROSOFT],
[metrowerks],[_AX_COMPILER_VERSION_METROWERKS],
[watcom],[_AX_COMPILER_VERSION_WATCOM],
[portland],[_AX_COMPILER_VERSION_PORTLAND],
[tcc],[_AX_COMPILER_VERSION_TCC],
[ax_cv_[]_AC_LANG_ABBREV[]_compiler_version=""])
])
])

34
m4/ax_gcc_option.m4 Normal file
View File

@ -0,0 +1,34 @@
AC_DEFUN([AX_GCC_OPTION], [
AC_REQUIRE([AC_PROG_CC])
AC_MSG_CHECKING([if gcc accepts $1 option])
AS_IF([ test "x$GCC" = "xyes" ],[
AS_IF([ test -z "$3" ],[
ax_gcc_option_test="int main()
{
return 0;
}"
],[
ax_gcc_option_test="$3"
])
# Dump the test program to file
cat <<EOF > conftest.c
$ax_gcc_option_test
EOF
# Dump back the file to the log, useful for debugging purposes
AC_TRY_COMMAND(cat conftest.c 1>&AS_MESSAGE_LOG_FD)
AS_IF([ AC_TRY_COMMAND($CC $2 $1 -c conftest.c 1>&AS_MESSAGE_LOG_FD) ],[
AC_MSG_RESULT([yes])
$4
],[
AC_MSG_RESULT([no])
$5
])
],[
AC_MSG_RESULT([no gcc available])
])
])

64
m4/ax_gcc_version.m4 Normal file
View File

@ -0,0 +1,64 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_version.html
# ===========================================================================
#
# OBSOLETE MACRO
#
# Use AX_COMPILER_VERSION instead
#
# SYNOPSIS
#
# AX_GCC_VERSION
#
# DESCRIPTION
#
# This macro retrieves the gcc version and returns it in the GCC_VERSION
# variable if available, an empty string otherwise.
#
# LICENSE
#
# Copyright (c) 2009 Francesco Salvestrini <salvestrini@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 10
AC_DEFUN([AX_GCC_VERSION], [
AC_OBSOLETE([$0], [;please use AX_COMPILER_VERSION instead])
AC_LANG_PUSH([C])
AC_REQUIRE([AX_COMPILER_VENDOR])
AC_REQUIRE([AX_COMPILER_VERSION])
AC_LANG_POP([C])
GCC_VERSION=""
ax_cv_gcc_version=""
AS_IF([test "X$ax_cv_c_compiler_vendor" = "Xgnu"],
[dnl
ax_cv_gcc_version=$ax_cv_c_compiler_version
GCC_VERSION=$ax_cv_gcc_version
])
AC_SUBST([GCC_VERSION])
])

67
m4/ax_gxx_version.m4 Normal file
View File

@ -0,0 +1,67 @@
# ===========================================================================
# http://autoconf-archive.cryp.to/ax_gxx_version.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GXX_VERSION
#
# DESCRIPTION
#
# This macro retrieves the g++ version and returns it in the GXX_VERSION
# variable if available, an empty string otherwise.
#
# LAST MODIFICATION
#
# 2008-04-12
#
# COPYLEFT
#
# Copyright (c) 2008 Francesco Salvestrini <salvestrini@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Macro Archive. When you make and
# distribute a modified version of the Autoconf Macro, you may extend this
# special exception to the GPL to apply to your modified version as well.
AC_DEFUN([AX_GXX_VERSION], [
GXX_VERSION=""
AX_GCC_OPTION([-dumpversion],[],[],[
ax_gcc_version_option=yes
],[
ax_gcc_version_option=no
])
AS_IF([test "x$GXX" = "xyes"],[
AS_IF([test "x$ax_gxx_version_option" != "no"],[
AC_CACHE_CHECK([gxx version],[ax_cv_gxx_version],[
ax_cv_gxx_version="`$CXX -dumpversion`"
AS_IF([test "x$ax_cv_gxx_version" = "x"],[
ax_cv_gxx_version=""
])
])
GXX_VERSION=$ax_cv_gxx_version
])
])
AC_SUBST([GXX_VERSION])
])

203
m4/lx_find_mpi.m4 Normal file
View File

@ -0,0 +1,203 @@
#################################################################################################
# Copyright (c) 2010, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory
# Written by Todd Gamblin, tgamblin@llnl.gov.
# LLNL-CODE-417602
# All rights reserved.
#
# This file is part of Libra. For details, see http://github.com/tgamblin/libra.
# Please also read the LICENSE file for further information.
#
# Redistribution and use in source and binary forms, with or without modification, are
# permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this list of
# conditions and the disclaimer below.
# * Redistributions in binary form must reproduce the above copyright notice, this list of
# conditions and the disclaimer (as noted below) in the documentation and/or other materials
# provided with the distribution.
# * Neither the name of the LLNS/LLNL nor the names of its contributors may be used to endorse
# or promote products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#################################################################################################
#
# LX_FIND_MPI()
# ------------------------------------------------------------------------
# This macro finds an MPI compiler and extracts includes and libraries from
# it for use in automake projects. The script exports the following variables:
#
# AC_DEFINE variables:
# HAVE_MPI AC_DEFINE'd to 1 if we found MPI
#
# AC_SUBST variables:
# MPICC Name of MPI compiler
# MPI_CFLAGS Includes and defines for MPI C compilation
# MPI_CLDFLAGS Libraries and library paths for linking MPI C programs
#
# MPICXX Name of MPI C++ compiler
# MPI_CXXFLAGS Includes and defines for MPI C++ compilation
# MPI_CXXLDFLAGS Libraries and library paths for linking MPI C++ programs
#
# MPIF77 Name of MPI Fortran 77 compiler
# MPI_F77FLAGS Includes and defines for MPI Fortran 77 compilation
# MPI_F77LDFLAGS Libraries and library paths for linking MPI Fortran 77 programs
#
# MPIFC Name of MPI Fortran compiler
# MPI_FFLAGS Includes and defines for MPI Fortran compilation
# MPI_FLDFLAGS Libraries and library paths for linking MPI Fortran programs
#
# Shell variables output by this macro:
# have_C_mpi 'yes' if we found MPI for C, 'no' otherwise
# have_CXX_mpi 'yes' if we found MPI for C++, 'no' otherwise
# have_F77_mpi 'yes' if we found MPI for F77, 'no' otherwise
# have_F_mpi 'yes' if we found MPI for Fortran, 'no' otherwise
#
AC_DEFUN([LX_FIND_MPI],
[
AC_LANG_CASE(
[C], [
AC_REQUIRE([AC_PROG_CC])
if [[ ! -z "$MPICC" ]]; then
LX_QUERY_MPI_COMPILER(MPICC, [$MPICC], C)
else
LX_QUERY_MPI_COMPILER(MPICC, [mpicc mpiicc mpixlc mpipgcc], C)
fi
],
[C++], [
AC_REQUIRE([AC_PROG_CXX])
if [[ ! -z "$MPICXX" ]]; then
LX_QUERY_MPI_COMPILER(MPICXX, [$MPICXX], CXX)
else
LX_QUERY_MPI_COMPILER(MPICXX, [mpicxx mpiCC mpic++ mpig++ mpiicpc mpipgCC mpixlC], CXX)
fi
],
[F77], [
AC_REQUIRE([AC_PROG_F77])
if [[ ! -z "$MPIF77" ]]; then
LX_QUERY_MPI_COMPILER(MPIF77, [$MPIF77], F77)
else
LX_QUERY_MPI_COMPILER(MPIF77, [mpif77 mpiifort mpixlf77 mpixlf77_r], F77)
fi
],
[Fortran], [
AC_REQUIRE([AC_PROG_FC])
if [[ ! -z "$MPIFC" ]]; then
LX_QUERY_MPI_COMPILER(MPIFC, [$MPIFC], F)
else
mpi_default_fc="mpif95 mpif90 mpigfortran mpif2003"
mpi_intel_fc="mpiifort"
mpi_xl_fc="mpixlf95 mpixlf95_r mpixlf90 mpixlf90_r mpixlf2003 mpixlf2003_r"
mpi_pg_fc="mpipgf95 mpipgf90"
LX_QUERY_MPI_COMPILER(MPIFC, [$mpi_default_fc $mpi_intel_fc $mpi_xl_fc $mpi_pg_fc], F)
fi
])
])
#
# LX_QUERY_MPI_COMPILER([compiler-var-name], [compiler-names], [output-var-prefix])
# ------------------------------------------------------------------------
# AC_SUBST variables:
# MPI_<prefix>FLAGS Includes and defines for MPI compilation
# MPI_<prefix>LDFLAGS Libraries and library paths for linking MPI C programs
#
# Shell variables output by this macro:
# found_mpi_flags 'yes' if we were able to get flags, 'no' otherwise
#
AC_DEFUN([LX_QUERY_MPI_COMPILER],
[
# Try to find a working MPI compiler from the supplied names
AC_PATH_PROGS($1, [$2], [not-found])
# Figure out what the compiler responds to to get it to show us the compile
# and link lines. After this part of the macro, we'll have a valid
# lx_mpi_command_line
printf "checking whether $$1 responds to '-showme:compile'... "
lx_mpi_compile_line=`$$1 -showme:compile 2>/dev/null`
if [[ "$?" -eq 0 ]]; then
echo yes
lx_mpi_link_line=`$$1 -showme:link 2>/dev/null`
else
echo no
printf "checking whether $$1 responds to '-showme'... "
lx_mpi_command_line=`$$1 -showme 2>/dev/null`
if [[ "$?" -ne 0 ]]; then
echo no
printf "checking whether $$1 responds to '-compile-info'... "
lx_mpi_compile_line=`$$1 -compile-info 2>/dev/null`
if [[ "$?" -eq 0 ]]; then
echo yes
lx_mpi_link_line=`$$1 -link-info 2>/dev/null`
else
echo no
printf "checking whether $$1 responds to '-show'... "
lx_mpi_command_line=`$$1 -show 2>/dev/null`
if [[ "$?" -eq 0 ]]; then
echo yes
else
echo no
fi
fi
else
echo yes
fi
fi
if [[ ! -z "$lx_mpi_compile_line" -a ! -z "$lx_mpi_link_line" ]]; then
lx_mpi_command_line="$lx_mpi_compile_line $lx_mpi_link_line"
fi
if [[ ! -z "$lx_mpi_command_line" ]]; then
# Now extract the different parts of the MPI command line. Do these separately in case we need to
# parse them all out in future versions of this macro.
lx_mpi_defines=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-D\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
lx_mpi_includes=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-I\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
lx_mpi_link_paths=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-L\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
lx_mpi_libs=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-l\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
lx_mpi_link_args=` echo "$lx_mpi_command_line" | grep -o -- '\(^\| \)-Wl,\([[^\"[:space:]]]\+\|\"[[^\"[:space:]]]\+\"\)'`
# Create variables and clean up newlines and multiple spaces
MPI_$3FLAGS="$lx_mpi_defines $lx_mpi_includes"
MPI_$3LDFLAGS="$lx_mpi_link_paths $lx_mpi_libs $lx_mpi_link_args"
MPI_$3FLAGS=` echo "$MPI_$3FLAGS" | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/ +/ /g'`
MPI_$3LDFLAGS=`echo "$MPI_$3LDFLAGS" | tr '\n' ' ' | sed 's/^[[ \t]]*//;s/[[ \t]]*$//' | sed 's/ +/ /g'`
OLD_CPPFLAGS=$CPPFLAGS
OLD_LIBS=$LIBS
CPPFLAGS=$MPI_$3FLAGS
LIBS=$MPI_$3LDFLAGS
AC_TRY_LINK([#include <mpi.h>],
[int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);],
[# Add a define for testing at compile time.
AC_DEFINE([HAVE_MPI], [1], [Define to 1 if you have MPI libs and headers.])
have_$3_mpi='yes'],
[# zero out mpi flags so we don't link against the faulty library.
MPI_$3FLAGS=""
MPI_$3LDFLAGS=""
have_$3_mpi='no'])
# AC_SUBST everything.
AC_SUBST($1)
AC_SUBST(MPI_$3FLAGS)
AC_SUBST(MPI_$3LDFLAGS)
LIBS=$OLD_LIBS
CPPFLAGS=$OLD_CPPFLAGS
else
echo Unable to find suitable MPI Compiler. Try setting $1.
have_$3_mpi='no'
fi
])

View File

@ -1,38 +0,0 @@
FFTFLAGS=$(filter-out -std=c++11, $(CXXFLAGS) )
EIGENVER=3.2.8
EIGEN=eigen$(EIGENVER)
EIGENTAR=$(EIGEN).tar.bz2
EIGENURL=https://bitbucket.org/eigen/eigen/get/$(EIGENVER).tar.bz2
FFTWVER=3.3.4
FFTW=fftw-$(FFTWVER)
FFTWTAR=fftw-$(FFTWVER).tar.gz
FFTWURL=http://www.fftw.org/$(FFTWTAR)
all: Eigen FFTW headerlist
$(top_srcdir)/prerequisites/$(EIGENTAR):
curl -v $(EIGENURL) -o $(top_srcdir)/prerequisites/$(EIGENTAR)
$(top_srcdir)/prerequisites/$(FFTWTAR):
curl -v $(FFTWURL) -o $(top_srcdir)/prerequisites/$(FFTWTAR)
Eigen: $(top_srcdir)/prerequisites/$(EIGENTAR)
tar xvf $(top_srcdir)/prerequisites/$(EIGENTAR)
- rm -rf $(top_srcdir)/lib/Eigen
mv eigen-eigen*/Eigen .
echo EFILES=`find Eigen -type f -name '*.h' ` > $(top_srcdir)/lib/Eigen.inc
mv Eigen $(top_srcdir)/lib/
touch Eigen
FFTW: $(top_srcdir)/prerequisites/$(FFTWTAR)
tar xvf $(top_srcdir)/prerequisites/$(FFTWTAR)
cd $(FFTW) && ./configure --prefix=@abs_top_builddir@/prerequisites/fftwinstall CFLAGS="$(FFTFLAGS)" CC=$(CC) LDFLAGS="$(LDFLAGS)" && make all install
cp -pr fftwinstall/include/fftw3.h ../include/Grid/
cp -pr fftwinstall/lib/libfftw3.a ../lib/
touch FFTW
headerlist:
cd $(top_srcdir) && ./scripts/filelist
touch headerlist

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +0,0 @@
HFILES=
CCFILES=

View File

@ -1,63 +1,47 @@
#!/bin/bash
#!/usr/bin/env bash
home=`pwd`
# library Make.inc
cd $home/lib
HFILES=`find . -type f -name '*.h' -not -path '*/Old/*'`
HFILES="$HFILES Config.h"
HFILES=`find . -type f -name '*.h' -not -path '*/Old/*' -not -path '*/Eigen/*'`
HFILES="$HFILES"
CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'`
echo> Make.inc
echo HFILES=$HFILES >> Make.inc
echo HFILES=$HFILES > Make.inc
echo >> Make.inc
echo CCFILES=$CCFILES >> Make.inc
# tests Make.inc
cd $home/tests
dirs=`find . -type d `
for subdir in $dirs
do
pwd
echo subdir is $subdir of $dirs
cd $home/tests/$subdir
TESTS=`ls T*.cc`
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
echo > Make.inc
echo bin_PROGRAMS += ${TESTLIST} | sed s/Test_zmm//g >> Make.inc
echo >> Make.inc
for f in $TESTS
do
BNAME=`basename $f .cc`
echo >> Make.inc
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc
done
for subdir in $dirs; do
cd $home/tests/$subdir
TESTS=`ls T*.cc`
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
PREF=`[ $subdir = '.' ] && echo noinst || echo EXTRA`
echo "tests: ${TESTLIST}" > Make.inc
echo ${PREF}_PROGRAMS = ${TESTLIST} >> Make.inc
echo >> Make.inc
for f in $TESTS; do
BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc
done
done
# benchmarks Make.inc
cd $home/benchmarks
echo> Make.inc
TESTS=`ls B*.cc`
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
echo > Make.inc
echo bin_PROGRAMS = ${TESTLIST} >> Make.inc
echo >> Make.inc
for f in $TESTS
do
BNAME=`basename $f .cc`
echo >> Make.inc
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo bin_PROGRAMS = ${TESTLIST} > Make.inc
echo >> Make.inc
for f in $TESTS; do
BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc
done
cd ..

19
scripts/update_eigen.sh Executable file
View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
if (( $# != 1 )); then
echo "usage: `basename $0` <archive>" 1>&2
exit 1
fi
ARC=$1
INITDIR=`pwd`
rm -rf lib/Eigen
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
tar -xf ${ARC}
cd ${ARCDIR}
(tar -cf - Eigen --exclude='*.txt' 2>/dev/null) | tar -xf - -C ../lib/
cd ../lib
echo 'eigen_files =\' > Eigen.inc
find Eigen -type f -print | sed 's/^/ /;$q;s/$/ \\/' >> Eigen.inc
cd ${INITDIR}
rm -rf ${ARCDIR}

18
scripts/update_fftw.sh Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
if (( $# != 1 )); then
echo "usage: `basename $0` <archive>" 1>&2
exit 1
fi
ARC=$1
INITDIR=`pwd`
rm -rf lib/fftw
mkdir lib/fftw
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
tar -xf ${ARC}
cp ${ARCDIR}/api/fftw3.h lib/fftw/
cd ${INITDIR}
rm -rf ${ARCDIR}

View File

@ -1,11 +0,0 @@
bin_PROGRAMS += Test_nersc_io Test_serialisation
Test_nersc_io_SOURCES=Test_nersc_io.cc
Test_nersc_io_LDADD=-lGrid
Test_serialisation_SOURCES=Test_serialisation.cc
Test_serialisation_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

View File

@ -1,19 +0,0 @@
bin_PROGRAMS += Test_cshift Test_dwf_mixedcg_prec Test_simd Test_stencil
Test_cshift_SOURCES=Test_cshift.cc
Test_cshift_LDADD=-lGrid
Test_dwf_mixedcg_prec_SOURCES=Test_dwf_mixedcg_prec.cc
Test_dwf_mixedcg_prec_LDADD=-lGrid
Test_simd_SOURCES=Test_simd.cc
Test_simd_LDADD=-lGrid
Test_stencil_SOURCES=Test_stencil.cc
Test_stencil_LDADD=-lGrid

View File

@ -1,26 +1,7 @@
# additional include paths necessary to compile the C++ library
#SUBDIRS = core
# Uncomment to enable complete test suite build
SUBDIRS = core forces hmc solver debug
if BUILD_CHROMA_REGRESSION
SUBDIRS+= qdpxx
endif
bin_PROGRAMS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

View File

@ -1,83 +1,69 @@
bin_PROGRAMS += Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
tests: Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_fft Test_fftf Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
EXTRA_PROGRAMS = Test_cf_coarsen_support Test_checker Test_contfrac_even_odd Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_even_odd Test_dwf_rb5d Test_fft Test_fftf Test_gamma Test_GaugeAction Test_gparity Test_gpwilson_even_odd Test_lie_generators Test_main Test_quenched_update Test_RectPlaq Test_rng Test_rng_fixed Test_wilson_even_odd Test_wilson_tm_even_odd
Test_cf_coarsen_support_SOURCES=Test_cf_coarsen_support.cc
Test_cf_coarsen_support_LDADD=-lGrid
Test_checker_SOURCES=Test_checker.cc
Test_checker_LDADD=-lGrid
Test_contfrac_even_odd_SOURCES=Test_contfrac_even_odd.cc
Test_contfrac_even_odd_LDADD=-lGrid
Test_cshift_red_black_SOURCES=Test_cshift_red_black.cc
Test_cshift_red_black_LDADD=-lGrid
Test_cshift_red_black_rotate_SOURCES=Test_cshift_red_black_rotate.cc
Test_cshift_red_black_rotate_LDADD=-lGrid
Test_cshift_rotate_SOURCES=Test_cshift_rotate.cc
Test_cshift_rotate_LDADD=-lGrid
Test_dwf_even_odd_SOURCES=Test_dwf_even_odd.cc
Test_dwf_even_odd_LDADD=-lGrid
Test_dwf_rb5d_SOURCES=Test_dwf_rb5d.cc
Test_dwf_rb5d_LDADD=-lGrid
Test_fft_SOURCES=Test_fft.cc
Test_fft_LDADD=-lGrid
Test_fftf_SOURCES=Test_fftf.cc
Test_fftf_LDADD=-lGrid
Test_gamma_SOURCES=Test_gamma.cc
Test_gamma_LDADD=-lGrid
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
Test_GaugeAction_LDADD=-lGrid
Test_gparity_SOURCES=Test_gparity.cc
Test_gparity_LDADD=-lGrid
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
Test_gpwilson_even_odd_LDADD=-lGrid
Test_lie_generators_SOURCES=Test_lie_generators.cc
Test_lie_generators_LDADD=-lGrid
Test_main_SOURCES=Test_main.cc
Test_main_LDADD=-lGrid
Test_quenched_update_SOURCES=Test_quenched_update.cc
Test_quenched_update_LDADD=-lGrid
Test_RectPlaq_SOURCES=Test_RectPlaq.cc
Test_RectPlaq_LDADD=-lGrid
Test_rng_SOURCES=Test_rng.cc
Test_rng_LDADD=-lGrid
Test_rng_fixed_SOURCES=Test_rng_fixed.cc
Test_rng_fixed_LDADD=-lGrid
Test_wilson_even_odd_SOURCES=Test_wilson_even_odd.cc
Test_wilson_even_odd_LDADD=-lGrid
Test_wilson_tm_even_odd_SOURCES=Test_wilson_tm_even_odd.cc
Test_wilson_tm_even_odd_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

111
tests/core/Test_fft.cc Normal file
View File

@ -0,0 +1,111 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
LatticeComplexD one(&Fine);
LatticeComplexD zz(&Fine);
LatticeComplexD C(&Fine);
LatticeComplexD Ctilde(&Fine);
LatticeComplexD coor(&Fine);
LatticeSpinMatrixD S(&Fine);
LatticeSpinMatrixD Stilde(&Fine);
std::vector<int> p({1,2,3,2});
one = ComplexD(1.0,0.0);
zz = ComplexD(0.0,0.0);
ComplexD ci(0.0,1.0);
C=zero;
for(int mu=0;mu<4;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
C = C - (TwoPiL * p[mu]) * coor;
}
C = exp(C*ci);
S=zero;
S = S+C;
FFT theFFT(&Fine);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
// C=zero;
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
TComplexD cVol;
cVol()()() = vol;
C=zero;
pokeSite(cVol,C,p);
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
SpinMatrixD Sp;
Sp = zero; Sp = Sp+cVol;
S=zero;
pokeSite(Sp,S,p);
S= S-Stilde;
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
Grid_finalize();
}

111
tests/core/Test_fftf.cc Normal file
View File

@ -0,0 +1,111 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexF::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
LatticeComplexF one(&Fine);
LatticeComplexF zz(&Fine);
LatticeComplexF C(&Fine);
LatticeComplexF Ctilde(&Fine);
LatticeComplexF coor(&Fine);
LatticeSpinMatrixF S(&Fine);
LatticeSpinMatrixF Stilde(&Fine);
std::vector<int> p({1,2,3,2});
one = ComplexF(1.0,0.0);
zz = ComplexF(0.0,0.0);
ComplexF ci(0.0,1.0);
C=zero;
for(int mu=0;mu<4;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
C = C - (TwoPiL * p[mu]) * coor;
}
C = exp(C*ci);
S=zero;
S = S+C;
FFT theFFT(&Fine);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
// C=zero;
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
TComplexF cVol;
cVol()()() = vol;
C=zero;
pokeSite(cVol,C,p);
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
SpinMatrixF Sp;
Sp = zero; Sp = Sp+cVol;
S=zero;
pokeSite(Sp,S,p);
S= S-Stilde;
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
Grid_finalize();
}

View File

@ -1,35 +0,0 @@
bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_even_odd_vec Test_cayley_ldop_cr Test_cheby Test_synthetic_lanczos
Test_cayley_cg_SOURCES=Test_cayley_cg.cc
Test_cayley_cg_LDADD=-lGrid
Test_cayley_coarsen_support_SOURCES=Test_cayley_coarsen_support.cc
Test_cayley_coarsen_support_LDADD=-lGrid
Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc
Test_cayley_even_odd_LDADD=-lGrid
Test_cayley_even_odd_vec_SOURCES=Test_cayley_even_odd_vec.cc
Test_cayley_even_odd_vec_LDADD=-lGrid
Test_cayley_ldop_cr_SOURCES=Test_cayley_ldop_cr.cc
Test_cayley_ldop_cr_LDADD=-lGrid
Test_cheby_SOURCES=Test_cheby.cc
Test_cheby_LDADD=-lGrid
Test_synthetic_lanczos_SOURCES=Test_synthetic_lanczos.cc
Test_synthetic_lanczos_LDADD=-lGrid
Test_zmm_SOURCES=Test_zmm.cc
Test_zmm_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

View File

@ -44,6 +44,7 @@ struct scal {
};
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusVecFermionR;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR;
typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR;
typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR;
@ -117,6 +118,17 @@ int main (int argc, char ** argv)
TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"Z-MobiusFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
TestMoo(zDmob,szDmob);
TestWhat<ZMobiusFermionR>(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<ZMobiusVecFermionR>(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;

View File

@ -26,9 +26,15 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <PerfCount.h>
#include <Grid/PerfCount.h>
#ifdef TEST_ZMM
int main(int argc,char **argv)
{
return 0;
}
int main(int argc, char **argv) { return 0; }
#if 0
#include <simd/Intel512wilson.h>
using namespace Grid;
@ -478,5 +484,12 @@ void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3)
return;
}
#endif
#else
int main(int argc, char **argv)
{
std::cerr << "error: no ZMM test for the selected architecture" << std::endl;
return 1;
}
#endif

View File

@ -1,47 +1,36 @@
bin_PROGRAMS += Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
tests: Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
EXTRA_PROGRAMS = Test_contfrac_force Test_dwf_force Test_dwf_gpforce Test_gpdwf_force Test_gp_rect_force Test_gpwilson_force Test_partfrac_force Test_rect_force Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
Test_contfrac_force_SOURCES=Test_contfrac_force.cc
Test_contfrac_force_LDADD=-lGrid
Test_dwf_force_SOURCES=Test_dwf_force.cc
Test_dwf_force_LDADD=-lGrid
Test_dwf_gpforce_SOURCES=Test_dwf_gpforce.cc
Test_dwf_gpforce_LDADD=-lGrid
Test_gpdwf_force_SOURCES=Test_gpdwf_force.cc
Test_gpdwf_force_LDADD=-lGrid
Test_gp_rect_force_SOURCES=Test_gp_rect_force.cc
Test_gp_rect_force_LDADD=-lGrid
Test_gpwilson_force_SOURCES=Test_gpwilson_force.cc
Test_gpwilson_force_LDADD=-lGrid
Test_partfrac_force_SOURCES=Test_partfrac_force.cc
Test_partfrac_force_LDADD=-lGrid
Test_rect_force_SOURCES=Test_rect_force.cc
Test_rect_force_LDADD=-lGrid
Test_wilson_force_SOURCES=Test_wilson_force.cc
Test_wilson_force_LDADD=-lGrid
Test_wilson_force_phiMdagMphi_SOURCES=Test_wilson_force_phiMdagMphi.cc
Test_wilson_force_phiMdagMphi_LDADD=-lGrid
Test_wilson_force_phiMphi_SOURCES=Test_wilson_force_phiMphi.cc
Test_wilson_force_phiMphi_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

View File

@ -1,75 +1,57 @@
bin_PROGRAMS += Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
tests: Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
EXTRA_PROGRAMS = Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
Test_hmc_EODWFRatio_LDADD=-lGrid
Test_hmc_EODWFRatio_Gparity_SOURCES=Test_hmc_EODWFRatio_Gparity.cc
Test_hmc_EODWFRatio_Gparity_LDADD=-lGrid
Test_hmc_EOWilsonFermionGauge_SOURCES=Test_hmc_EOWilsonFermionGauge.cc
Test_hmc_EOWilsonFermionGauge_LDADD=-lGrid
Test_hmc_EOWilsonRatio_SOURCES=Test_hmc_EOWilsonRatio.cc
Test_hmc_EOWilsonRatio_LDADD=-lGrid
Test_hmc_GparityIwasakiGauge_SOURCES=Test_hmc_GparityIwasakiGauge.cc
Test_hmc_GparityIwasakiGauge_LDADD=-lGrid
Test_hmc_GparityWilsonGauge_SOURCES=Test_hmc_GparityWilsonGauge.cc
Test_hmc_GparityWilsonGauge_LDADD=-lGrid
Test_hmc_IwasakiGauge_SOURCES=Test_hmc_IwasakiGauge.cc
Test_hmc_IwasakiGauge_LDADD=-lGrid
Test_hmc_RectGauge_SOURCES=Test_hmc_RectGauge.cc
Test_hmc_RectGauge_LDADD=-lGrid
Test_hmc_WilsonAdjointFermionGauge_SOURCES=Test_hmc_WilsonAdjointFermionGauge.cc
Test_hmc_WilsonAdjointFermionGauge_LDADD=-lGrid
Test_hmc_WilsonFermionGauge_SOURCES=Test_hmc_WilsonFermionGauge.cc
Test_hmc_WilsonFermionGauge_LDADD=-lGrid
Test_hmc_WilsonGauge_SOURCES=Test_hmc_WilsonGauge.cc
Test_hmc_WilsonGauge_LDADD=-lGrid
Test_hmc_WilsonRatio_SOURCES=Test_hmc_WilsonRatio.cc
Test_hmc_WilsonRatio_LDADD=-lGrid
Test_multishift_sqrt_SOURCES=Test_multishift_sqrt.cc
Test_multishift_sqrt_LDADD=-lGrid
Test_remez_SOURCES=Test_remez.cc
Test_remez_LDADD=-lGrid
Test_rhmc_EOWilson1p1_SOURCES=Test_rhmc_EOWilson1p1.cc
Test_rhmc_EOWilson1p1_LDADD=-lGrid
Test_rhmc_EOWilsonRatio_SOURCES=Test_rhmc_EOWilsonRatio.cc
Test_rhmc_EOWilsonRatio_LDADD=-lGrid
Test_rhmc_Wilson1p1_SOURCES=Test_rhmc_Wilson1p1.cc
Test_rhmc_Wilson1p1_LDADD=-lGrid
Test_rhmc_WilsonRatio_SOURCES=Test_rhmc_WilsonRatio.cc
Test_rhmc_WilsonRatio_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc

View File

@ -1,11 +0,0 @@
bin_PROGRAMS += Test_qdpxx_loops_staples Test_qdpxx_munprec
Test_qdpxx_loops_staples_SOURCES=Test_qdpxx_loops_staples.cc
Test_qdpxx_loops_staples_LDADD=-lGrid
Test_qdpxx_munprec_SOURCES=Test_qdpxx_munprec.cc
Test_qdpxx_munprec_LDADD=-lGrid

View File

@ -1,6 +1,4 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS += `chroma-config --cxxflags`
AM_LDFLAGS += `chroma-config --ldflags` `chroma-config --libs`
AM_CXXFLAGS = -I$(top_srcdir)/include `chroma-config --cxxflags`
AM_LDFLAGS = -L$(top_builddir)/lib `chroma-config --ldflags` `chroma-config --libs`
bin_PROGRAMS=
include Make.inc

View File

@ -1,55 +0,0 @@
bin_PROGRAMS += Test_cf_cr_unprec Test_contfrac_cg Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_fpgcr Test_dwf_hdcr Test_dwf_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec
Test_cf_cr_unprec_SOURCES=Test_cf_cr_unprec.cc
Test_cf_cr_unprec_LDADD=-lGrid
Test_contfrac_cg_SOURCES=Test_contfrac_cg.cc
Test_contfrac_cg_LDADD=-lGrid
Test_dwf_cg_prec_SOURCES=Test_dwf_cg_prec.cc
Test_dwf_cg_prec_LDADD=-lGrid
Test_dwf_cg_schur_SOURCES=Test_dwf_cg_schur.cc
Test_dwf_cg_schur_LDADD=-lGrid
Test_dwf_cg_unprec_SOURCES=Test_dwf_cg_unprec.cc
Test_dwf_cg_unprec_LDADD=-lGrid
Test_dwf_cr_unprec_SOURCES=Test_dwf_cr_unprec.cc
Test_dwf_cr_unprec_LDADD=-lGrid
Test_dwf_fpgcr_SOURCES=Test_dwf_fpgcr.cc
Test_dwf_fpgcr_LDADD=-lGrid
Test_dwf_hdcr_SOURCES=Test_dwf_hdcr.cc
Test_dwf_hdcr_LDADD=-lGrid
Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
Test_dwf_lanczos_LDADD=-lGrid
Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
Test_wilson_cg_prec_LDADD=-lGrid
Test_wilson_cg_schur_SOURCES=Test_wilson_cg_schur.cc
Test_wilson_cg_schur_LDADD=-lGrid
Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc
Test_wilson_cg_unprec_LDADD=-lGrid
Test_wilson_cr_unprec_SOURCES=Test_wilson_cr_unprec.cc
Test_wilson_cr_unprec_LDADD=-lGrid

View File

@ -1,19 +1 @@
# additional include paths necessary to compile the C++ library
bin_PROGRAMS =
SUBDIRS =
AM_CXXFLAGS = -I$(top_srcdir)/include
AM_LDFLAGS = -L$(top_builddir)/lib
if USE_LAPACK
AM_CXXFLAGS += -DUSE_LAPACK
if USE_LAPACK_LIB
#if test "X${ac_LAPACK}X" != XyesX
AM_CXXFLAGS += -I$(ac_LAPACK)/include
AM_LDFLAGS += -L$(ac_LAPACK)/lib
#fi
endif
endif
include Make.inc