mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'develop' into feature/hadrons
This commit is contained in:
commit
a034e9901b
4
.gitignore
vendored
4
.gitignore
vendored
@ -94,6 +94,10 @@ build.sh
|
|||||||
################
|
################
|
||||||
lib/Eigen/*
|
lib/Eigen/*
|
||||||
|
|
||||||
|
# FFTW source #
|
||||||
|
################
|
||||||
|
lib/fftw/*
|
||||||
|
|
||||||
# libtool macros #
|
# libtool macros #
|
||||||
##################
|
##################
|
||||||
m4/lt*
|
m4/lt*
|
||||||
|
15
.travis.yml
15
.travis.yml
@ -23,6 +23,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-4.9
|
env: VERSION=-4.9
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
@ -35,6 +37,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-5
|
env: VERSION=-5
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -47,6 +51,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -59,6 +65,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
|
|
||||||
@ -69,6 +77,7 @@ before_install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||||
|
|
||||||
install:
|
install:
|
||||||
@ -92,3 +101,9 @@ script:
|
|||||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||||
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||||
|
- make -j4
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
12
README.md
12
README.md
@ -68,10 +68,18 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
|
|||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
mkdir build; cd build
|
mkdir build; cd build
|
||||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi --prefix=<path>
|
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `--enable-precision=` set the default precision (`single` or `double`), `--enable-simd=` set the SIMD type (see possible values below), `--enable-comms=` set the protocol used for communications (`none`, `mpi` or `shmem`), and `<path>` should be replaced by the prefix path where you want to install Grid. Other options are available, use `configure --help` to display them. Like with any other program using GNU autotool, the `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to customise the build.
|
where `--enable-precision=` set the default precision (`single` or `double`),
|
||||||
|
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
|
||||||
|
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
|
||||||
|
`shmem`), and `<path>` should be replaced by the prefix path where you want to
|
||||||
|
install Grid. The `mpi-auto` communication option set `configure` to determine
|
||||||
|
automatically how to link to MPI. Other options are available, use `configure
|
||||||
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
|
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||||
|
customise the build.
|
||||||
|
|
||||||
Finally, you can build and install Grid:
|
Finally, you can build and install Grid:
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
@ -315,7 +315,7 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -61,6 +61,8 @@ int main (int argc, char ** argv)
|
|||||||
QCD::WilsonKernelsStatic::AsmOpt=0;
|
QCD::WilsonKernelsStatic::AsmOpt=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
117
benchmarks/Benchmark_wilson_sweep.cc
Normal file
117
benchmarks/Benchmark_wilson_sweep.cc
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
Copyright (C) 2015
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Richard Rollins <rprollins@users.noreply.github.com>
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
bool overlapComms = false;
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag );
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
|
||||||
|
typename WilsonFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = overlapComms;
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
RealD mass = 0.1;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
|
||||||
|
int Lmax = 32;
|
||||||
|
int dmin = 0;
|
||||||
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
|
for (int L=8; L<=Lmax; L*=2)
|
||||||
|
{
|
||||||
|
std::vector<int> latt_size = std::vector<int>(4,L);
|
||||||
|
for(int d=4; d>dmin; d--)
|
||||||
|
{
|
||||||
|
if ( d<=3 ) { latt_size[d] *= 2; }
|
||||||
|
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
|
||||||
|
std::cout << latt_size.back() << "\t\t";
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||||
|
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||||
|
LatticeFermion src(&Grid); random(pRNG,src);
|
||||||
|
LatticeFermion result(&Grid); result=zero;
|
||||||
|
|
||||||
|
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
|
||||||
|
|
||||||
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
|
||||||
|
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerNo);
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerYes);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag )
|
||||||
|
{
|
||||||
|
int ncall = 1000;
|
||||||
|
double t0 = usecond();
|
||||||
|
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
|
||||||
|
double t1 = usecond();
|
||||||
|
double flops = 1344 * volume * ncall;
|
||||||
|
std::cout << flops/(t1-t0) << "\t\t";
|
||||||
|
}
|
@ -40,14 +40,20 @@ int main(int argc,char **argv)
|
|||||||
std::ofstream os("zmm.dat");
|
std::ofstream os("zmm.dat");
|
||||||
|
|
||||||
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
|
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
|
||||||
for(int L=4;L<=32;L+=4){
|
for(int L=4;L<=32;L+=4){
|
||||||
for(int m=1;m<=2;m++){
|
for(int m=1;m<=2;m++){
|
||||||
for(int Ls=8;Ls<=16;Ls+=8){
|
for(int Ls=8;Ls<=16;Ls+=8){
|
||||||
std::vector<int> grid({L,L,m*L,m*L});
|
std::vector<int> grid({L,L,m*L,m*L});
|
||||||
|
std::cout << GridLogMessage <<"\t";
|
||||||
for(int i=0;i<4;i++) {
|
for(int i=0;i<4;i++) {
|
||||||
std::cout << grid[i]<<"x";
|
std::cout << grid[i]<<"x";
|
||||||
}
|
}
|
||||||
std::cout << Ls<<std::endl;
|
std::cout << Ls<<"\t\t";
|
||||||
bench(os,grid,Ls);
|
bench(os,grid,Ls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
|
||||||
int ncall=50;
|
int ncall=50;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
@ -116,7 +121,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
double flops=1344*volume/2;
|
double flops=1344*volume/2;
|
||||||
|
|
||||||
mfc = flops*ncall/(t1-t0);
|
mfc = flops*ncall/(t1-t0);
|
||||||
std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl;
|
std::cout<<mfc<<"\t\t";
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=1;
|
QCD::WilsonKernelsStatic::AsmOpt=1;
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
@ -125,7 +130,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
}
|
}
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
mfa = flops*ncall/(t1-t0);
|
mfa = flops*ncall/(t1-t0);
|
||||||
std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl;
|
std::cout<<mfa<<"\t\t";
|
||||||
/*
|
/*
|
||||||
int dag=DaggerNo;
|
int dag=DaggerNo;
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
@ -163,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|||||||
//resulta = (-0.5) * resulta;
|
//resulta = (-0.5) * resulta;
|
||||||
|
|
||||||
diff = resulto-resulta;
|
diff = resulto-resulta;
|
||||||
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
|
std::cout<<norm2(diff)<<std::endl;
|
||||||
std::cout<<std::endl;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,11 +1,18 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||||
|
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
|
||||||
|
|
||||||
echo "-- deploying Eigen source..."
|
echo "-- deploying Eigen source..."
|
||||||
wget ${EIGEN_URL}
|
wget ${EIGEN_URL} --no-check-certificate
|
||||||
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||||
rm `basename ${EIGEN_URL}`
|
rm `basename ${EIGEN_URL}`
|
||||||
|
|
||||||
|
echo "-- copying fftw prototypes..."
|
||||||
|
wget ${FFTW_URL}
|
||||||
|
./scripts/update_fftw.sh `basename ${FFTW_URL}`
|
||||||
|
rm `basename ${FFTW_URL}`
|
||||||
|
|
||||||
echo '-- generating Make.inc files...'
|
echo '-- generating Make.inc files...'
|
||||||
./scripts/filelist
|
./scripts/filelist
|
||||||
echo '-- generating configure script...'
|
echo '-- generating configure script...'
|
||||||
|
48
configure.ac
48
configure.ac
@ -8,11 +8,20 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
|||||||
|
|
||||||
############### Checks for programs
|
############### Checks for programs
|
||||||
AC_LANG(C++)
|
AC_LANG(C++)
|
||||||
: ${CXXFLAGS="-O3"}
|
CXXFLAGS="-O3 $CXXFLAGS"
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
|
AC_PROG_RANLIB
|
||||||
|
|
||||||
|
############ openmp ###############
|
||||||
AC_OPENMP
|
AC_OPENMP
|
||||||
|
|
||||||
|
ac_openmp=no
|
||||||
|
|
||||||
|
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||||
|
ac_openmp=yes
|
||||||
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
||||||
LT_INIT([disable-shared])
|
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
############### Checks for header files
|
############### Checks for header files
|
||||||
AC_CHECK_HEADERS(stdint.h)
|
AC_CHECK_HEADERS(stdint.h)
|
||||||
@ -29,7 +38,7 @@ AC_TYPE_SIZE_T
|
|||||||
AC_TYPE_UINT32_T
|
AC_TYPE_UINT32_T
|
||||||
AC_TYPE_UINT64_T
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
############### Options
|
############### GMP and MPFR #################
|
||||||
AC_ARG_WITH([gmp],
|
AC_ARG_WITH([gmp],
|
||||||
[AS_HELP_STRING([--with-gmp=prefix],
|
[AS_HELP_STRING([--with-gmp=prefix],
|
||||||
[try this for a non-standard install prefix of the GMP library])],
|
[try this for a non-standard install prefix of the GMP library])],
|
||||||
@ -40,9 +49,12 @@ AC_ARG_WITH([mpfr],
|
|||||||
[try this for a non-standard install prefix of the MPFR library])],
|
[try this for a non-standard install prefix of the MPFR library])],
|
||||||
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||||
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
################## lapack ####################
|
||||||
AC_ARG_ENABLE([lapack],
|
AC_ARG_ENABLE([lapack],
|
||||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||||
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
||||||
|
|
||||||
case ${ac_LAPACK} in
|
case ${ac_LAPACK} in
|
||||||
no)
|
no)
|
||||||
;;
|
;;
|
||||||
@ -54,6 +66,13 @@ case ${ac_LAPACK} in
|
|||||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
################## FFTW3 ####################
|
||||||
|
AC_ARG_WITH([fftw],
|
||||||
|
[AS_HELP_STRING([--with-fftw=prefix],
|
||||||
|
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
################ Get compiler informations
|
################ Get compiler informations
|
||||||
AC_LANG([C++])
|
AC_LANG([C++])
|
||||||
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||||
@ -67,7 +86,6 @@ AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
|||||||
############### Checks for library functions
|
############### Checks for library functions
|
||||||
CXXFLAGS_CPY=$CXXFLAGS
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
LDFLAGS_CPY=$LDFLAGS
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
LIBS_CPY=$LIBS
|
|
||||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
@ -77,7 +95,7 @@ AC_CHECK_LIB([gmp],[__gmpf_init],
|
|||||||
[have_mpfr=true]
|
[have_mpfr=true]
|
||||||
[LIBS="$LIBS -lmpfr"],
|
[LIBS="$LIBS -lmpfr"],
|
||||||
[AC_MSG_ERROR([MPFR library not found])])]
|
[AC_MSG_ERROR([MPFR library not found])])]
|
||||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
||||||
[have_gmp=true]
|
[have_gmp=true]
|
||||||
[LIBS="$LIBS -lgmp"],
|
[LIBS="$LIBS -lgmp"],
|
||||||
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
|
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
|
||||||
@ -86,6 +104,11 @@ if test "${ac_LAPACK}x" != "nox"; then
|
|||||||
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
|
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
|
||||||
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||||
fi
|
fi
|
||||||
|
AC_CHECK_LIB([fftw3],[fftw_execute],
|
||||||
|
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
|
||||||
|
[have_fftw=true]
|
||||||
|
[LIBS="$LIBS -lfftw3 -lfftw3f"],
|
||||||
|
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
|
||||||
CXXFLAGS=$CXXFLAGS_CPY
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
LDFLAGS=$LDFLAGS_CPY
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
@ -108,16 +131,19 @@ case ${ax_cv_cxx_compiler_vendor} in
|
|||||||
SIMD_FLAGS='-mavx -mfma4';;
|
SIMD_FLAGS='-mavx -mfma4';;
|
||||||
AVX2)
|
AVX2)
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
SIMD_FLAGS='-mavx2';;
|
SIMD_FLAGS='-mavx2 -mfma';;
|
||||||
AVX512|AVX512MIC|KNL)
|
AVX512|AVX512MIC|KNL)
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||||
IMCI|KNC)
|
IMCI|KNC)
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
GEN)
|
GEN)
|
||||||
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
|
QPX|BGQ)
|
||||||
|
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
|
||||||
esac;;
|
esac;;
|
||||||
@ -294,15 +320,17 @@ Summary of configuration for $PACKAGE v$VERSION
|
|||||||
- compiler version : ${ax_cv_gxx_version}
|
- compiler version : ${ax_cv_gxx_version}
|
||||||
----- BUILD OPTIONS -----------------------------------
|
----- BUILD OPTIONS -----------------------------------
|
||||||
- SIMD : ${ac_SIMD}
|
- SIMD : ${ac_SIMD}
|
||||||
- communications type : ${ac_COMMS}
|
- Threading : ${ac_openmp}
|
||||||
- default precision : ${ac_PRECISION}
|
- Communications type : ${ac_COMMS}
|
||||||
|
- Default precision : ${ac_PRECISION}
|
||||||
- RNG choice : ${ac_RNG}
|
- RNG choice : ${ac_RNG}
|
||||||
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
- LAPACK : ${ac_LAPACK}
|
- LAPACK : ${ac_LAPACK}
|
||||||
|
- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
----- BUILD FLAGS -------------------------------------
|
----- BUILD FLAGS -------------------------------------
|
||||||
- CXXFLAGS:
|
- CXXFLAGS:
|
||||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
|
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
|
||||||
- LDFLAGS:
|
- LDFLAGS:
|
||||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
|
`echo ${AM_LDFLAGS} ${LDFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
|
||||||
|
276
lib/FFT.h
Normal file
276
lib/FFT.h
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cshift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef _GRID_FFT_H_
|
||||||
|
#define _GRID_FFT_H_
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
#include <fftw3.h>
|
||||||
|
#endif
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class scalar> struct FFTW { };
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
template<> struct FFTW<ComplexD> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftw_complex FFTW_scalar;
|
||||||
|
typedef fftw_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftw_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct FFTW<ComplexF> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftwf_complex FFTW_scalar;
|
||||||
|
typedef fftwf_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftwf_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftwf_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftwf_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FFTW_FORWARD
|
||||||
|
#define FFTW_FORWARD (-1)
|
||||||
|
#define FFTW_BACKWARD (+1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class FFT {
|
||||||
|
private:
|
||||||
|
|
||||||
|
GridCartesian *vgrid;
|
||||||
|
GridCartesian *sgrid;
|
||||||
|
|
||||||
|
int Nd;
|
||||||
|
double flops;
|
||||||
|
double flops_call;
|
||||||
|
uint64_t usec;
|
||||||
|
|
||||||
|
std::vector<int> dimensions;
|
||||||
|
std::vector<int> processors;
|
||||||
|
std::vector<int> processor_coor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
static const int forward=FFTW_FORWARD;
|
||||||
|
static const int backward=FFTW_BACKWARD;
|
||||||
|
|
||||||
|
double Flops(void) {return flops;}
|
||||||
|
double MFlops(void) {return flops/usec;}
|
||||||
|
|
||||||
|
FFT ( GridCartesian * grid ) :
|
||||||
|
vgrid(grid),
|
||||||
|
Nd(grid->_ndimension),
|
||||||
|
dimensions(grid->_fdimensions),
|
||||||
|
processors(grid->_processors),
|
||||||
|
processor_coor(grid->_processor_coor)
|
||||||
|
{
|
||||||
|
flops=0;
|
||||||
|
usec =0;
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||||
|
};
|
||||||
|
|
||||||
|
~FFT ( void) {
|
||||||
|
delete sgrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
|
||||||
|
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
|
||||||
|
int L = vgrid->_ldimensions[dim];
|
||||||
|
int G = vgrid->_fdimensions[dim];
|
||||||
|
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||||
|
|
||||||
|
pencil_gd[dim] = G*processors[dim];
|
||||||
|
|
||||||
|
// Pencil global vol LxLxGxLxL per node
|
||||||
|
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||||
|
|
||||||
|
// Construct pencils
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename sobj::scalar_type scalar;
|
||||||
|
|
||||||
|
Lattice<vobj> ssource(vgrid); ssource =source;
|
||||||
|
Lattice<sobj> pgsource(&pencil_g);
|
||||||
|
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
|
||||||
|
|
||||||
|
#ifndef HAVE_FFTW
|
||||||
|
assert(0);
|
||||||
|
#else
|
||||||
|
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||||
|
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||||
|
|
||||||
|
{
|
||||||
|
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||||
|
int Nlow = 1;
|
||||||
|
for(int d=0;d<dim;d++){
|
||||||
|
Nlow*=vgrid->_ldimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
int rank = 1; /* 1d transforms */
|
||||||
|
int n[] = {G}; /* 1d transforms of length G */
|
||||||
|
int howmany = Ncomp;
|
||||||
|
int odist,idist,istride,ostride;
|
||||||
|
idist = odist = 1; /* Distance between consecutive FT's */
|
||||||
|
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||||
|
int *inembed = n, *onembed = n;
|
||||||
|
|
||||||
|
|
||||||
|
int sign = FFTW_FORWARD;
|
||||||
|
if (inverse) sign = FFTW_BACKWARD;
|
||||||
|
|
||||||
|
FFTW_plan p;
|
||||||
|
{
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
|
||||||
|
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||||
|
in,inembed,
|
||||||
|
istride,idist,
|
||||||
|
out,onembed,
|
||||||
|
ostride, odist,
|
||||||
|
sign,FFTW_ESTIMATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
double add,mul,fma;
|
||||||
|
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||||
|
flops_call = add+mul+2.0*fma;
|
||||||
|
|
||||||
|
GridStopWatch timer;
|
||||||
|
|
||||||
|
// Barrel shift and collect global pencil
|
||||||
|
for(int p=0;p<processors[dim];p++) {
|
||||||
|
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
peekLocalSite(s,ssource,lcoor);
|
||||||
|
|
||||||
|
lcoor[dim]+=p*L;
|
||||||
|
|
||||||
|
pokeLocalSite(s,pgsource,lcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
ssource = Cshift(ssource,dim,L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over orthog coords
|
||||||
|
int NN=pencil_g.lSites();
|
||||||
|
|
||||||
|
GridStopWatch Timer;
|
||||||
|
Timer.Start();
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<NN;idx++) {
|
||||||
|
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
|
||||||
|
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
|
||||||
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Timer.Stop();
|
||||||
|
usec += Timer.useconds();
|
||||||
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
|
int pc = processor_coor[dim];
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
std::vector<int> gcoor = lcoor;
|
||||||
|
// extract the result
|
||||||
|
sobj s;
|
||||||
|
gcoor[dim] = lcoor[dim]+L*pc;
|
||||||
|
peekLocalSite(s,pgresult,gcoor);
|
||||||
|
pokeLocalSite(s,result,lcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
FFTW<scalar>::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/Simd.h>
|
#include <Grid/Simd.h>
|
||||||
#include <Grid/Threads.h>
|
#include <Grid/Threads.h>
|
||||||
#include <Grid/Lexicographic.h>
|
#include <Grid/Lexicographic.h>
|
||||||
|
#include <Grid/Init.h>
|
||||||
#include <Grid/Communicator.h>
|
#include <Grid/Communicator.h>
|
||||||
#include <Grid/Cartesian.h>
|
#include <Grid/Cartesian.h>
|
||||||
#include <Grid/Tensors.h>
|
#include <Grid/Tensors.h>
|
||||||
@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/parallelIO/BinaryIO.h>
|
#include <Grid/parallelIO/BinaryIO.h>
|
||||||
#include <Grid/qcd/QCD.h>
|
#include <Grid/qcd/QCD.h>
|
||||||
#include <Grid/parallelIO/NerscIO.h>
|
#include <Grid/parallelIO/NerscIO.h>
|
||||||
#include <Grid/Init.h>
|
|
||||||
|
#include <Grid/FFT.h>
|
||||||
|
|
||||||
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||||
#include <Grid/qcd/hmc/HmcRunner.h>
|
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||||
|
@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
assert(ompthreads.size()==1);
|
assert(ompthreads.size()==1);
|
||||||
GridThread::SetThreads(ompthreads[0]);
|
GridThread::SetThreads(ompthreads[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
||||||
std::vector<int> cores(0);
|
std::vector<int> cores(0);
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
||||||
@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
|
@ -17,8 +17,8 @@ endif
|
|||||||
include Make.inc
|
include Make.inc
|
||||||
include Eigen.inc
|
include Eigen.inc
|
||||||
|
|
||||||
lib_LTLIBRARIES = libGrid.la
|
lib_LIBRARIES = libGrid.a
|
||||||
|
|
||||||
libGrid_la_SOURCES = $(CCFILES) $(extra_sources)
|
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
||||||
libGrid_ladir = $(pkgincludedir)
|
libGrid_adir = $(pkgincludedir)
|
||||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||||
|
@ -265,7 +265,7 @@
|
|||||||
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
|
||||||
}
|
}
|
||||||
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
|
||||||
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
|
//_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
|
||||||
local = _entries[ent]._is_local;
|
local = _entries[ent]._is_local;
|
||||||
perm = _entries[ent]._permute;
|
perm = _entries[ent]._permute;
|
||||||
if (perm) ptype = _permute_type[point];
|
if (perm) ptype = _permute_type[point];
|
||||||
|
@ -127,21 +127,12 @@ class CartesianCommunicator {
|
|||||||
int recv_from_rank,
|
int recv_from_rank,
|
||||||
int bytes);
|
int bytes);
|
||||||
|
|
||||||
void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
|
|
||||||
void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
|
||||||
int bytes);
|
|
||||||
|
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int xmit_to_rank,
|
int xmit_to_rank,
|
||||||
void *recv,
|
void *recv,
|
||||||
int recv_from_rank,
|
int recv_from_rank,
|
||||||
int bytes);
|
int bytes);
|
||||||
|
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
|
|
||||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
@ -144,28 +144,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Basic Halo comms primitive
|
// Basic Halo comms primitive
|
||||||
// Basic Halo comms primitive
|
|
||||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
|
|
||||||
void *xmit,
|
|
||||||
int dest,
|
|
||||||
void *recv,
|
|
||||||
int from,
|
|
||||||
int bytes)
|
|
||||||
{
|
|
||||||
MPI_Request xrq;
|
|
||||||
MPI_Request rrq;
|
|
||||||
int rank = _processor;
|
|
||||||
int ierr;
|
|
||||||
ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
|
||||||
ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
|
|
||||||
assert(ierr==0);
|
|
||||||
list.push_back(xrq);
|
|
||||||
list.push_back(rrq);
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
|
|
||||||
{
|
|
||||||
MPI_Startall(list.size(),&list[0]);
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
@ -173,12 +151,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
int from,
|
int from,
|
||||||
int bytes)
|
int bytes)
|
||||||
{
|
{
|
||||||
std::vector<CommsRequest_t> reqs(0);
|
MPI_Request xrq;
|
||||||
SendToRecvFromInit(reqs,xmit,dest,recv,from,bytes);
|
MPI_Request rrq;
|
||||||
SendToRecvFromBegin(reqs);
|
int rank = _processor;
|
||||||
for(int i=0;i<reqs.size();i++){
|
int ierr;
|
||||||
list.push_back(reqs[i]);
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
}
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
|
@ -84,19 +84,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
|
|
||||||
void *xmit,
|
|
||||||
int dest,
|
|
||||||
void *recv,
|
|
||||||
int from,
|
|
||||||
int bytes)
|
|
||||||
{
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
|
|
||||||
{
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -268,10 +268,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Basic Halo comms primitive
|
// Basic Halo comms primitive
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
|
|
||||||
{
|
|
||||||
assert(0); //unimplemented
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
void *xmit,
|
void *xmit,
|
||||||
int dest,
|
int dest,
|
||||||
@ -284,15 +280,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
|
||||||
shmem_putmem(recv,xmit,bytes,dest);
|
shmem_putmem(recv,xmit,bytes,dest);
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
|
|
||||||
void *xmit,
|
|
||||||
int dest,
|
|
||||||
void *recv,
|
|
||||||
int from,
|
|
||||||
int bytes)
|
|
||||||
{
|
|
||||||
assert(0); // Unimplemented
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
{
|
{
|
||||||
// shmem_quiet(); // I'm done
|
// shmem_quiet(); // I'm done
|
||||||
|
@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<ig->lSites();idx++){
|
for(int idx=0;idx<ig->lSites();idx++){
|
||||||
std::vector<int> lcoor(ni);
|
std::vector<int> lcoor(ni);
|
||||||
ig->LocalIndexToLocalCoor(idx,lcoor);
|
ig->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,lowDim,lcoor);
|
||||||
|
pokeLocalSite(s,higherDim,hcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,higherDim,hcoor);
|
||||||
|
pokeLocalSite(s,lowDim,lcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||||
{
|
{
|
||||||
|
@ -194,22 +194,22 @@ class BinaryIO {
|
|||||||
|
|
||||||
std::vector<int> site({x,y,z,t});
|
std::vector<int> site({x,y,z,t});
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if (grid->IsBoss()) {
|
||||||
fin.read((char *)&file_object,sizeof(file_object));
|
fin.read((char *)&file_object, sizeof(file_object));
|
||||||
bytes += sizeof(file_object);
|
bytes += sizeof(file_object);
|
||||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
|
|
||||||
munge(file_object,munged,csum);
|
munge(file_object, munged, csum);
|
||||||
}
|
}
|
||||||
// The boss who read the file has their value poked
|
// The boss who read the file has their value poked
|
||||||
pokeSite(munged,Umu,site);
|
pokeSite(munged,Umu,site);
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -254,20 +254,20 @@ class BinaryIO {
|
|||||||
|
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
// NB could gather an xstrip as an optimisation.
|
// NB could gather an xstrip as an optimisation.
|
||||||
fout.write((char *)&file_object,sizeof(file_object));
|
fout.write((char *)&file_object,sizeof(file_object));
|
||||||
bytes+=sizeof(file_object);
|
bytes+=sizeof(file_object);
|
||||||
}
|
}
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -305,15 +305,15 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
||||||
parallel.GetState(saved,l_idx);
|
parallel.GetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
fout.write((char *)&saved[0],bytes);
|
fout.write((char *)&saved[0],bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -355,14 +355,14 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
fin.read((char *)&saved[0],bytes);
|
fin.read((char *)&saved[0],bytes);
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(0,(void *)&saved[0],bytes);
|
grid->Broadcast(0,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
parallel.SetState(saved,l_idx);
|
parallel.SetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -415,15 +415,15 @@ class BinaryIO {
|
|||||||
|
|
||||||
if ( d == 0 ) parallel[d] = 0;
|
if ( d == 0 ) parallel[d] = 0;
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
}
|
}
|
||||||
@ -434,9 +434,9 @@ class BinaryIO {
|
|||||||
std::cout<< std::dec ;
|
std::cout<< std::dec ;
|
||||||
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -463,7 +463,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -472,8 +472,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
@ -487,29 +487,29 @@ class BinaryIO {
|
|||||||
// iorank reads from the seek
|
// iorank reads from the seek
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
fin.seekg(offset+g_idx*sizeof(fileObj));
|
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||||
fin.read((char *)&fileObj,sizeof(fileObj));
|
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
|
|
||||||
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
munge(fileObj,siteObj,csum);
|
munge(fileObj,siteObj,csum);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Possibly do transport through pt2pt
|
// Possibly do transport through pt2pt
|
||||||
if ( rank != iorank ) {
|
if ( rank != iorank ) {
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) {
|
if ( (myrank == rank) || (myrank==iorank) ) {
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Poke at destination
|
// Poke at destination
|
||||||
if ( myrank == rank ) {
|
if ( myrank == rank ) {
|
||||||
pokeLocalSite(siteObj,Umu,lsite);
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
}
|
}
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
}
|
}
|
||||||
@ -520,7 +520,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -558,15 +558,15 @@ class BinaryIO {
|
|||||||
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
||||||
|
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
@ -577,9 +577,9 @@ class BinaryIO {
|
|||||||
grid->GlobalSum(tmp);
|
grid->GlobalSum(tmp);
|
||||||
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -610,7 +610,7 @@ class BinaryIO {
|
|||||||
// should aggregate a whole chunk and then write.
|
// should aggregate a whole chunk and then write.
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -619,8 +619,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -640,26 +640,26 @@ class BinaryIO {
|
|||||||
|
|
||||||
// Pair of nodes may need to do pt2pt send
|
// Pair of nodes may need to do pt2pt send
|
||||||
if ( rank != iorank ) { // comms is necessary
|
if ( rank != iorank ) { // comms is necessary
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
||||||
// Send to IOrank
|
// Send to IOrank
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
|
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
munge(siteObj,fileObj,csum);
|
munge(siteObj,fileObj,csum);
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
fout.seekp(offset+g_idx*sizeof(fileObj));
|
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||||
fout.write((char *)&fileObj,sizeof(fileObj));
|
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -668,7 +668,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
|
@ -55,11 +55,14 @@ namespace QCD {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// QCD iMatrix types
|
// QCD iMatrix types
|
||||||
// Index conventions: Lorentz x Spin x Colour
|
// Index conventions: Lorentz x Spin x Colour
|
||||||
|
// note: static const int or constexpr will work for type deductions
|
||||||
|
// with the intel compiler (up to version 17)
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
static const int ColourIndex = 2;
|
#define ColourIndex 2
|
||||||
static const int SpinIndex = 1;
|
#define SpinIndex 1
|
||||||
static const int LorentzIndex= 0;
|
#define LorentzIndex 0
|
||||||
|
|
||||||
|
|
||||||
// Also should make these a named enum type
|
// Also should make these a named enum type
|
||||||
static const int DaggerNo=0;
|
static const int DaggerNo=0;
|
||||||
static const int DaggerYes=1;
|
static const int DaggerYes=1;
|
||||||
|
@ -111,12 +111,16 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
|||||||
#define FermOp4dVecTemplateInstantiate(A) \
|
#define FermOp4dVecTemplateInstantiate(A) \
|
||||||
template class A<WilsonImplF>; \
|
template class A<WilsonImplF>; \
|
||||||
template class A<WilsonImplD>; \
|
template class A<WilsonImplD>; \
|
||||||
|
template class A<ZWilsonImplF>; \
|
||||||
|
template class A<ZWilsonImplD>; \
|
||||||
template class A<GparityWilsonImplF>; \
|
template class A<GparityWilsonImplF>; \
|
||||||
template class A<GparityWilsonImplD>;
|
template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
#define FermOp5dVecTemplateInstantiate(A) \
|
#define FermOp5dVecTemplateInstantiate(A) \
|
||||||
template class A<DomainWallVec5dImplF>; \
|
template class A<DomainWallVec5dImplF>; \
|
||||||
template class A<DomainWallVec5dImplD>;
|
template class A<DomainWallVec5dImplD>; \
|
||||||
|
template class A<ZDomainWallVec5dImplF>; \
|
||||||
|
template class A<ZDomainWallVec5dImplD>;
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
#define FermOpTemplateInstantiate(A) \
|
||||||
FermOp4dVecTemplateInstantiate(A) \
|
FermOp4dVecTemplateInstantiate(A) \
|
||||||
@ -138,6 +142,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
|||||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/MobiusFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusFermion.h>
|
||||||
|
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
|
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
|
||||||
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
|
||||||
@ -176,6 +181,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
|||||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||||
|
|
||||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
||||||
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
||||||
|
@ -54,18 +54,18 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag (Ls,1.0);
|
std::vector<Coeff_t> diag (Ls,1.0);
|
||||||
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
|
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
|
||||||
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass;
|
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
|
||||||
M5D(psi,chi,chi,lower,diag,upper);
|
M5D(psi,chi,chi,lower,diag,upper);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag = bs;
|
std::vector<Coeff_t> diag = bs;
|
||||||
std::vector<RealD> upper= cs;
|
std::vector<Coeff_t> upper= cs;
|
||||||
std::vector<RealD> lower= cs;
|
std::vector<Coeff_t> lower= cs;
|
||||||
upper[Ls-1]=-mass*upper[Ls-1];
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
lower[0] =-mass*lower[0];
|
lower[0] =-mass*lower[0];
|
||||||
M5D(psi,psi,Din,lower,diag,upper);
|
M5D(psi,psi,Din,lower,diag,upper);
|
||||||
@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
|
|||||||
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag = beo;
|
std::vector<Coeff_t> diag = beo;
|
||||||
std::vector<RealD> upper(Ls);
|
std::vector<Coeff_t> upper(Ls);
|
||||||
std::vector<RealD> lower(Ls);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
for(int i=0;i<Ls;i++) {
|
for(int i=0;i<Ls;i++) {
|
||||||
upper[i]=-ceo[i];
|
upper[i]=-ceo[i];
|
||||||
lower[i]=-ceo[i];
|
lower[i]=-ceo[i];
|
||||||
@ -88,9 +88,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag = bee;
|
std::vector<Coeff_t> diag = bee;
|
||||||
std::vector<RealD> upper(Ls);
|
std::vector<Coeff_t> upper(Ls);
|
||||||
std::vector<RealD> lower(Ls);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
for(int i=0;i<Ls;i++) {
|
for(int i=0;i<Ls;i++) {
|
||||||
upper[i]=-cee[i];
|
upper[i]=-cee[i];
|
||||||
lower[i]=-cee[i];
|
lower[i]=-cee[i];
|
||||||
@ -104,9 +104,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag = bee;
|
std::vector<Coeff_t> diag = bee;
|
||||||
std::vector<RealD> upper(Ls);
|
std::vector<Coeff_t> upper(Ls);
|
||||||
std::vector<RealD> lower(Ls);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
|
||||||
for (int s=0;s<Ls;s++){
|
for (int s=0;s<Ls;s++){
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
@ -129,9 +129,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag(Ls,1.0);
|
std::vector<Coeff_t> diag(Ls,1.0);
|
||||||
std::vector<RealD> upper(Ls,-1.0);
|
std::vector<Coeff_t> upper(Ls,-1.0);
|
||||||
std::vector<RealD> lower(Ls,-1.0);
|
std::vector<Coeff_t> lower(Ls,-1.0);
|
||||||
upper[Ls-1]=-mass*upper[Ls-1];
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
lower[0] =-mass*lower[0];
|
lower[0] =-mass*lower[0];
|
||||||
M5Ddag(psi,chi,chi,lower,diag,upper);
|
M5Ddag(psi,chi,chi,lower,diag,upper);
|
||||||
@ -141,9 +141,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
std::vector<RealD> diag =bs;
|
std::vector<Coeff_t> diag =bs;
|
||||||
std::vector<RealD> upper=cs;
|
std::vector<Coeff_t> upper=cs;
|
||||||
std::vector<RealD> lower=cs;
|
std::vector<Coeff_t> lower=cs;
|
||||||
upper[Ls-1]=-mass*upper[Ls-1];
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
lower[0] =-mass*lower[0];
|
lower[0] =-mass*lower[0];
|
||||||
M5Ddag(psi,psi,Din,lower,diag,upper);
|
M5Ddag(psi,psi,Din,lower,diag,upper);
|
||||||
@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
{
|
{
|
||||||
SetCoefficientsZolotarev(1.0,zdata,b,c);
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(1.0,gamma,b,c);
|
||||||
}
|
}
|
||||||
//Zolo
|
//Zolo
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(zolo_hi,gamma,b,c);
|
||||||
|
}
|
||||||
|
//Zolo
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
|
|||||||
double bmc = b-c;
|
double bmc = b-c;
|
||||||
for(int i=0; i < Ls; i++){
|
for(int i=0; i < Ls; i++){
|
||||||
as[i] = 1.0;
|
as[i] = 1.0;
|
||||||
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
||||||
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
||||||
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
||||||
}
|
}
|
||||||
@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
double delta_d=mass*cee[Ls-1];
|
Coeff_t delta_d=mass*cee[Ls-1];
|
||||||
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
||||||
dee[Ls-1] += delta_d;
|
dee[Ls-1] += delta_d;
|
||||||
}
|
}
|
||||||
|
@ -62,16 +62,16 @@ namespace Grid {
|
|||||||
void M5D(const FermionField &psi,
|
void M5D(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper);
|
std::vector<Coeff_t> &upper);
|
||||||
|
|
||||||
void M5Ddag(const FermionField &psi,
|
void M5Ddag(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper);
|
std::vector<Coeff_t> &upper);
|
||||||
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
||||||
|
|
||||||
virtual void Instantiatable(void)=0;
|
virtual void Instantiatable(void)=0;
|
||||||
@ -91,23 +91,23 @@ namespace Grid {
|
|||||||
RealD mass;
|
RealD mass;
|
||||||
|
|
||||||
// Cayley form Moebius (tanh and zolotarev)
|
// Cayley form Moebius (tanh and zolotarev)
|
||||||
std::vector<RealD> omega;
|
std::vector<Coeff_t> omega;
|
||||||
std::vector<RealD> bs; // S dependent coeffs
|
std::vector<Coeff_t> bs; // S dependent coeffs
|
||||||
std::vector<RealD> cs;
|
std::vector<Coeff_t> cs;
|
||||||
std::vector<RealD> as;
|
std::vector<Coeff_t> as;
|
||||||
// For preconditioning Cayley form
|
// For preconditioning Cayley form
|
||||||
std::vector<RealD> bee;
|
std::vector<Coeff_t> bee;
|
||||||
std::vector<RealD> cee;
|
std::vector<Coeff_t> cee;
|
||||||
std::vector<RealD> aee;
|
std::vector<Coeff_t> aee;
|
||||||
std::vector<RealD> beo;
|
std::vector<Coeff_t> beo;
|
||||||
std::vector<RealD> ceo;
|
std::vector<Coeff_t> ceo;
|
||||||
std::vector<RealD> aeo;
|
std::vector<Coeff_t> aeo;
|
||||||
// LDU factorisation of the eeoo matrix
|
// LDU factorisation of the eeoo matrix
|
||||||
std::vector<RealD> lee;
|
std::vector<Coeff_t> lee;
|
||||||
std::vector<RealD> leem;
|
std::vector<Coeff_t> leem;
|
||||||
std::vector<RealD> uee;
|
std::vector<Coeff_t> uee;
|
||||||
std::vector<RealD> ueem;
|
std::vector<Coeff_t> ueem;
|
||||||
std::vector<RealD> dee;
|
std::vector<Coeff_t> dee;
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
CayleyFermion5D(GaugeField &_Umu,
|
CayleyFermion5D(GaugeField &_Umu,
|
||||||
@ -117,20 +117,19 @@ namespace Grid {
|
|||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
|
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#define INSTANTIATE_DPERP(A)\
|
#define INSTANTIATE_DPERP(A)\
|
||||||
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
|
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
|
||||||
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
|
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
@ -43,9 +43,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
@ -82,9 +82,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
|
|||||||
INSTANTIATE_DPERP(WilsonImplD);
|
INSTANTIATE_DPERP(WilsonImplD);
|
||||||
INSTANTIATE_DPERP(GparityWilsonImplF);
|
INSTANTIATE_DPERP(GparityWilsonImplF);
|
||||||
INSTANTIATE_DPERP(GparityWilsonImplD);
|
INSTANTIATE_DPERP(GparityWilsonImplD);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplD);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -43,9 +43,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
@ -65,9 +65,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
|
@ -53,9 +53,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
@ -121,9 +121,9 @@ template<class Impl>
|
|||||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
const FermionField &phi,
|
const FermionField &phi,
|
||||||
FermionField &chi,
|
FermionField &chi,
|
||||||
std::vector<RealD> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<RealD> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<RealD> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
|
|||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
Pplus(s,s) = bee[s];
|
Pplus(s,s) = bee[s];
|
||||||
@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
|
|||||||
Pplus (0,Ls-1) = mass*cee[0];
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
Eigen::MatrixXd PplusMat ;
|
Eigen::MatrixXcd PplusMat ;
|
||||||
Eigen::MatrixXd PminusMat;
|
Eigen::MatrixXcd PminusMat;
|
||||||
|
|
||||||
if ( inv ) {
|
if ( inv ) {
|
||||||
PplusMat =Pplus.inverse();
|
PplusMat =Pplus.inverse();
|
||||||
@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
INSTANTIATE_DPERP(DomainWallVec5dImplD);
|
INSTANTIATE_DPERP(DomainWallVec5dImplD);
|
||||||
INSTANTIATE_DPERP(DomainWallVec5dImplF);
|
INSTANTIATE_DPERP(DomainWallVec5dImplF);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
|
||||||
|
|
||||||
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
@ -100,7 +100,8 @@ namespace Grid {
|
|||||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||||
typedef typename Impl::Compressor Compressor; \
|
typedef typename Impl::Compressor Compressor; \
|
||||||
typedef typename Impl::StencilImpl StencilImpl; \
|
typedef typename Impl::StencilImpl StencilImpl; \
|
||||||
typedef typename Impl::ImplParams ImplParams;
|
typedef typename Impl::ImplParams ImplParams; \
|
||||||
|
typedef typename Impl::Coeff_t Coeff_t;
|
||||||
|
|
||||||
#define INHERIT_IMPL_TYPES(Base) \
|
#define INHERIT_IMPL_TYPES(Base) \
|
||||||
INHERIT_GIMPL_TYPES(Base)\
|
INHERIT_GIMPL_TYPES(Base)\
|
||||||
@ -109,12 +110,14 @@ namespace Grid {
|
|||||||
///////
|
///////
|
||||||
// Single flavour four spinors with colour index
|
// Single flavour four spinors with colour index
|
||||||
///////
|
///////
|
||||||
template<class S,int Nrepresentation=Nc>
|
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||||
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > {
|
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
||||||
const bool LsVectorised=false;
|
const bool LsVectorised=false;
|
||||||
|
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
@ -192,12 +195,13 @@ PARALLEL_FOR_LOOP
|
|||||||
///////
|
///////
|
||||||
// Single flavour four spinors with colour index, 5d redblack
|
// Single flavour four spinors with colour index, 5d redblack
|
||||||
///////
|
///////
|
||||||
template<class S,int Nrepresentation=Nc>
|
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||||
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
const bool LsVectorised=true;
|
const bool LsVectorised=true;
|
||||||
|
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
@ -287,12 +291,13 @@ PARALLEL_FOR_LOOP
|
|||||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class S,int Nrepresentation>
|
template<class S,int Nrepresentation,class _Coeff_t = RealD>
|
||||||
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
|
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
const bool LsVectorised=false;
|
const bool LsVectorised=false;
|
||||||
|
|
||||||
|
typedef _Coeff_t Coeff_t;
|
||||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
@ -483,6 +488,18 @@ PARALLEL_FOR_LOOP
|
|||||||
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
||||||
|
|
||||||
|
typedef WilsonImpl<vComplex ,Nc,ComplexD> ZWilsonImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF,Nc,ComplexD> ZWilsonImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD,Nc,ComplexD> ZWilsonImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||||
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||||
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||||
|
@ -68,16 +68,21 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,
|
|||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
// No asm implementation yet.
|
#ifdef AVX512
|
||||||
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
if ( AsmOpt ) {
|
||||||
// else
|
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||||
for(int site=0;site<Ns;site++) {
|
} else {
|
||||||
for(int s=0;s<Ls;s++) {
|
#else
|
||||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
{
|
||||||
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
#endif
|
||||||
sF++;
|
for(int site=0;site<Ns;site++) {
|
||||||
|
for(int s=0;s<Ls;s++) {
|
||||||
|
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
}
|
}
|
||||||
sU++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,6 +79,10 @@ namespace Grid {
|
|||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
|
||||||
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
@ -92,7 +96,25 @@ namespace Grid {
|
|||||||
WilsonKernels(const ImplParams &p= ImplParams());
|
WilsonKernels(const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Default to no assembler implementation
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -26,59 +26,56 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
// Default to no assembler implementation
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(AVX512)
|
#if defined(AVX512)
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// If we are AVX512 specialise the single precision routine
|
// If we are AVX512 specialise the single precision routine
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include <simd/Intel512wilson.h>
|
#include <simd/Intel512wilson.h>
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
|
|
||||||
static Vector<vComplexF> signs;
|
static Vector<vComplexF> signs;
|
||||||
|
|
||||||
int setupSigns(void ){
|
int setupSigns(void ){
|
||||||
Vector<vComplexF> bother(2);
|
Vector<vComplexF> bother(2);
|
||||||
signs = bother;
|
signs = bother;
|
||||||
vrsign(signs[0]);
|
vrsign(signs[0]);
|
||||||
visign(signs[1]);
|
visign(signs[1]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
static int signInit = setupSigns();
|
static int signInit = setupSigns();
|
||||||
|
|
||||||
#define label(A) ilabel(A)
|
#define label(A) ilabel(A)
|
||||||
#define ilabel(A) ".globl\n" #A ":\n"
|
#define ilabel(A) ".globl\n" #A ":\n"
|
||||||
|
|
||||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||||
#define FX(A) WILSONASM_ ##A
|
#define FX(A) WILSONASM_ ##A
|
||||||
template<>
|
|
||||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
#undef KERNEL_DAG
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template<>
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
|
#define KERNEL_DAG
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#undef VMOVIDUP
|
#undef VMOVIDUP
|
||||||
#undef VMOVRDUP
|
#undef VMOVRDUP
|
||||||
#undef MAYBEPERM
|
#undef MAYBEPERM
|
||||||
@ -89,32 +86,22 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
|
|||||||
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
||||||
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
||||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||||
template<>
|
|
||||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
#undef KERNEL_DAG
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template<>
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
|
#define KERNEL_DAG
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
}
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
|
||||||
}}
|
|
||||||
|
|
||||||
|
@ -30,7 +30,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns);
|
LOAD64(%r10,isigns);
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XP_PROJMEM(base);
|
||||||
|
#else
|
||||||
XM_PROJMEM(base);
|
XM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR3,perm);
|
MAYBEPERM(PERMUTE_DIR3,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -41,15 +45,22 @@
|
|||||||
MULT_2SPIN_DIR_PFXP(Xp,basep);
|
MULT_2SPIN_DIR_PFXP(Xp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns);
|
LOAD64(%r10,isigns);
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XP_RECON;
|
||||||
|
#else
|
||||||
XM_RECON;
|
XM_RECON;
|
||||||
|
#endif
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YP_PROJMEM(base);
|
||||||
|
#else
|
||||||
YM_PROJMEM(base);
|
YM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR2,perm);
|
MAYBEPERM(PERMUTE_DIR2,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -60,7 +71,11 @@
|
|||||||
MULT_2SPIN_DIR_PFYP(Yp,basep);
|
MULT_2SPIN_DIR_PFYP(Yp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
YM_RECON_ACCUM;
|
YM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
@ -68,7 +83,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZP_PROJMEM(base);
|
||||||
|
#else
|
||||||
ZM_PROJMEM(base);
|
ZM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR1,perm);
|
MAYBEPERM(PERMUTE_DIR1,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -79,7 +98,11 @@
|
|||||||
MULT_2SPIN_DIR_PFZP(Zp,basep);
|
MULT_2SPIN_DIR_PFZP(Zp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
@ -87,7 +110,11 @@
|
|||||||
basep = st.GetPFInfo(nent,plocal); nent++;
|
basep = st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TP_PROJMEM(base);
|
||||||
|
#else
|
||||||
TM_PROJMEM(base);
|
TM_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR0,perm);
|
MAYBEPERM(PERMUTE_DIR0,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -98,7 +125,11 @@
|
|||||||
MULT_2SPIN_DIR_PFTP(Tp,basep);
|
MULT_2SPIN_DIR_PFTP(Tp,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TP_RECON_ACCUM;
|
||||||
|
#else
|
||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
@ -107,7 +138,11 @@
|
|||||||
// basep= st.GetPFInfo(nent,plocal); nent++;
|
// basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XM_PROJMEM(base);
|
||||||
|
#else
|
||||||
XP_PROJMEM(base);
|
XP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR3,perm);
|
MAYBEPERM(PERMUTE_DIR3,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -118,7 +153,11 @@
|
|||||||
MULT_2SPIN_DIR_PFXM(Xm,basep);
|
MULT_2SPIN_DIR_PFXM(Xm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
XM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
XP_RECON_ACCUM;
|
XP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
@ -126,7 +165,11 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YM_PROJMEM(base);
|
||||||
|
#else
|
||||||
YP_PROJMEM(base);
|
YP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR2,perm);
|
MAYBEPERM(PERMUTE_DIR2,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -137,7 +180,11 @@
|
|||||||
MULT_2SPIN_DIR_PFYM(Ym,basep);
|
MULT_2SPIN_DIR_PFYM(Ym,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
YM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
YP_RECON_ACCUM;
|
YP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
@ -145,7 +192,11 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZM_PROJMEM(base);
|
||||||
|
#else
|
||||||
ZP_PROJMEM(base);
|
ZP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR1,perm);
|
MAYBEPERM(PERMUTE_DIR1,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -156,7 +207,11 @@
|
|||||||
MULT_2SPIN_DIR_PFZM(Zm,basep);
|
MULT_2SPIN_DIR_PFZM(Zm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
ZM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
@ -164,7 +219,11 @@
|
|||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TM_PROJMEM(base);
|
||||||
|
#else
|
||||||
TP_PROJMEM(base);
|
TP_PROJMEM(base);
|
||||||
|
#endif
|
||||||
MAYBEPERM(PERMUTE_DIR0,perm);
|
MAYBEPERM(PERMUTE_DIR0,perm);
|
||||||
} else {
|
} else {
|
||||||
LOAD_CHI(base);
|
LOAD_CHI(base);
|
||||||
@ -175,7 +234,11 @@
|
|||||||
MULT_2SPIN_DIR_PFTM(Tm,basep);
|
MULT_2SPIN_DIR_PFTM(Tm,basep);
|
||||||
}
|
}
|
||||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||||
|
#ifdef KERNEL_DAG
|
||||||
|
TM_RECON_ACCUM;
|
||||||
|
#else
|
||||||
TP_RECON_ACCUM;
|
TP_RECON_ACCUM;
|
||||||
|
#endif
|
||||||
|
|
||||||
basep= st.GetPFInfo(nent,plocal); nent++;
|
basep= st.GetPFInfo(nent,plocal); nent++;
|
||||||
SAVE_RESULT(base,basep);
|
SAVE_RESULT(base,basep);
|
||||||
|
@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
// Need Nc=3 though //
|
// Need Nc=3 though //
|
||||||
|
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
#define INSTANTIATE_THEM(A) \
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
||||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
int ss,int sU,const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
INSTANTIATE_THEM(WilsonImplF);
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(WilsonImplD);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(ZWilsonImplF);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(ZWilsonImplD);
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(GparityWilsonImplF);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(GparityWilsonImplD);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(DomainWallVec5dImplF);
|
||||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
INSTANTIATE_THEM(DomainWallVec5dImplD);
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
|
||||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
79
lib/qcd/action/fermion/ZMobiusFermion.h
Normal file
79
lib/qcd/action/fermion/ZMobiusFermion.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
|
||||||
|
#define GRID_QCD_ZMOBIUS_FERMION_H
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
class ZMobiusFermion : public CayleyFermion5D<Impl>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
public:
|
||||||
|
|
||||||
|
virtual void Instantiatable(void) {};
|
||||||
|
// Constructors
|
||||||
|
ZMobiusFermion(GaugeField &_Umu,
|
||||||
|
GridCartesian &FiveDimGrid,
|
||||||
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
|
GridCartesian &FourDimGrid,
|
||||||
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
|
RealD _mass,RealD _M5,
|
||||||
|
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
|
FiveDimGrid,
|
||||||
|
FiveDimRedBlackGrid,
|
||||||
|
FourDimGrid,
|
||||||
|
FourDimRedBlackGrid,_mass,_M5,p)
|
||||||
|
|
||||||
|
{
|
||||||
|
RealD eps = 1.0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
|
||||||
|
std::vector<Coeff_t> zgamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++){
|
||||||
|
zgamma[s] = gamma[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call base setter
|
||||||
|
this->SetCoefficientsInternal(1.0,zgamma,b,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,300 +1,421 @@
|
|||||||
/*************************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/simd/Grid_qpx.h
|
Source file: ./lib/simd/Grid_qpx.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2016
|
||||||
|
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
******************************************************************************/
|
||||||
/* END LEGAL */
|
|
||||||
//----------------------------------------------------------------------
|
|
||||||
/*! @file Grid_qpx.h
|
|
||||||
@brief Optimization libraries for QPX instructions set for BG/Q
|
|
||||||
|
|
||||||
Using intrinsics
|
|
||||||
*/
|
|
||||||
// Time-stamp: <2015-05-27 11:30:21 neo>
|
|
||||||
//----------------------------------------------------------------------
|
|
||||||
|
|
||||||
// lot of undefined functions
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
namespace Optimization {
|
namespace Optimization {
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
float v0,v1,v2,v3;
|
||||||
|
} vector4float;
|
||||||
|
|
||||||
|
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
|
||||||
|
{
|
||||||
|
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
|
||||||
|
return stream;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
|
||||||
|
{
|
||||||
|
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
|
||||||
|
return stream;
|
||||||
|
};
|
||||||
|
|
||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
//Complex float
|
//Complex float
|
||||||
inline float operator()(float a, float b){
|
inline vector4float operator()(float a, float b){
|
||||||
return {a,b,a,b};
|
return (vector4float){a, b, a, b};
|
||||||
}
|
}
|
||||||
// Real float
|
// Real float
|
||||||
inline float operator()(float a){
|
inline vector4float operator()(float a){
|
||||||
return {a,a,a,a};
|
return (vector4float){a, a, a, a};
|
||||||
}
|
}
|
||||||
//Complex double
|
//Complex double
|
||||||
inline vector4double operator()(double a, double b){
|
inline vector4double operator()(double a, double b){
|
||||||
return {a,b,a,b};
|
return (vector4double){a, b, a, b};
|
||||||
}
|
}
|
||||||
//Real double
|
//Real double
|
||||||
inline vector4double operator()(double a){
|
inline vector4double operator()(double a){
|
||||||
return {a,a,a,a};
|
return (vector4double){a, a, a, a};
|
||||||
}
|
}
|
||||||
//Integer
|
//Integer
|
||||||
inline int operator()(Integer a){
|
inline int operator()(Integer a){
|
||||||
#error
|
return a;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Vstore{
|
struct Vstore{
|
||||||
//Float
|
//Float
|
||||||
inline void operator()(float a, float* F){
|
inline void operator()(vector4double a, float *f){
|
||||||
assert(0);
|
vec_st(a, 0, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void operator()(vector4double a, vector4float &f){
|
||||||
|
vec_st(a, 0, (float *)(&f));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void operator()(vector4float a, float *f){
|
||||||
|
f[0] = a.v0;
|
||||||
|
f[1] = a.v1;
|
||||||
|
f[2] = a.v2;
|
||||||
|
f[3] = a.v3;
|
||||||
|
}
|
||||||
|
|
||||||
//Double
|
//Double
|
||||||
inline void operator()(vector4double a, double* D){
|
inline void operator()(vector4double a, double *d){
|
||||||
assert(0);
|
vec_st(a, 0, d);
|
||||||
}
|
}
|
||||||
//Integer
|
//Integer
|
||||||
inline void operator()(int a, Integer* I){
|
inline void operator()(int a, Integer *i){
|
||||||
assert(0);
|
i[0] = a;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Vstream{
|
struct Vstream{
|
||||||
//Float
|
//Float
|
||||||
inline void operator()(float * a, float b){
|
inline void operator()(float *f, vector4double a){
|
||||||
assert(0);
|
vec_st(a, 0, f);
|
||||||
}
|
|
||||||
//Double
|
|
||||||
inline void operator()(double * a, vector4double b){
|
|
||||||
assert(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void operator()(vector4float f, vector4double a){
|
||||||
|
vec_st(a, 0, (float *)(&f));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void operator()(float *f, vector4float a){
|
||||||
|
f[0] = a.v0;
|
||||||
|
f[1] = a.v1;
|
||||||
|
f[2] = a.v2;
|
||||||
|
f[3] = a.v3;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Double
|
||||||
|
inline void operator()(double *d, vector4double a){
|
||||||
|
vec_st(a, 0, d);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Vset{
|
struct Vset{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline float operator()(Grid::ComplexF *a){
|
inline vector4float operator()(Grid::ComplexF *a){
|
||||||
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()};
|
return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vector4double operator()(Grid::ComplexD *a){
|
inline vector4double operator()(Grid::ComplexD *a){
|
||||||
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()};
|
return vec_ld(0, (double *)a);
|
||||||
}
|
}
|
||||||
// Real float
|
|
||||||
inline float operator()(float *a){
|
// Real float
|
||||||
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]};
|
inline vector4float operator()(float *a){
|
||||||
|
return (vector4float){a[0], a[1], a[2], a[3]};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline vector4double operator()(vector4float a){
|
||||||
|
return vec_ld(0, (float *)(&a));
|
||||||
|
}
|
||||||
|
|
||||||
// Real double
|
// Real double
|
||||||
inline vector4double operator()(double *a){
|
inline vector4double operator()(double *a){
|
||||||
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]};
|
return vec_ld(0, a);
|
||||||
}
|
}
|
||||||
// Integer
|
// Integer
|
||||||
inline int operator()(Integer *a){
|
inline int operator()(Integer *a){
|
||||||
#error
|
return a[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Out_type, typename In_type>
|
template <typename Out_type, typename In_type>
|
||||||
struct Reduce{
|
struct Reduce{
|
||||||
//Need templated class to overload output type
|
//Need templated class to overload output type
|
||||||
//General form must generate error if compiled
|
//General form must generate error if compiled
|
||||||
inline Out_type operator()(In_type in){
|
inline Out_type operator()(In_type in){
|
||||||
printf("Error, using wrong Reduce function\n");
|
printf("Error, using wrong Reduce function\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
|
#define FLOAT_WRAP_2(fn, pref)\
|
||||||
|
pref vector4float fn(vector4float a, vector4float b)\
|
||||||
|
{\
|
||||||
|
vector4double ad, bd, rd;\
|
||||||
|
vector4float r;\
|
||||||
|
\
|
||||||
|
ad = Vset()(a);\
|
||||||
|
bd = Vset()(b);\
|
||||||
|
rd = fn(ad, bd);\
|
||||||
|
Vstore()(rd, r);\
|
||||||
|
\
|
||||||
|
return r;\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FLOAT_WRAP_1(fn, pref)\
|
||||||
|
pref vector4float fn(vector4float a)\
|
||||||
|
{\
|
||||||
|
vector4double ad, rd;\
|
||||||
|
vector4float r;\
|
||||||
|
\
|
||||||
|
ad = Vset()(a);\
|
||||||
|
rd = fn(ad);\
|
||||||
|
Vstore()(rd, r);\
|
||||||
|
\
|
||||||
|
return r;\
|
||||||
|
}
|
||||||
|
|
||||||
struct Sum{
|
struct Sum{
|
||||||
//Complex/Real float
|
|
||||||
inline float operator()(float a, float b){
|
|
||||||
#error
|
|
||||||
}
|
|
||||||
//Complex/Real double
|
//Complex/Real double
|
||||||
inline vector4double operator()(vector4double a, vector4double b){
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
return vec_add(a,b);
|
return vec_add(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Complex/Real float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
|
|
||||||
//Integer
|
//Integer
|
||||||
inline int operator()(int a, int b){
|
inline int operator()(int a, int b){
|
||||||
#error
|
return a + b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Sub{
|
struct Sub{
|
||||||
//Complex/Real float
|
|
||||||
inline float operator()(float a, float b){
|
|
||||||
#error
|
|
||||||
}
|
|
||||||
//Complex/Real double
|
//Complex/Real double
|
||||||
inline vector4double operator()(vector4double a, vector4double b){
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
#error
|
return vec_sub(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Complex/Real float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
|
|
||||||
//Integer
|
//Integer
|
||||||
inline floati operator()(int a, int b){
|
inline int operator()(int a, int b){
|
||||||
#error
|
return a - b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct MultComplex{
|
struct MultComplex{
|
||||||
// Complex float
|
|
||||||
inline float operator()(float a, float b){
|
|
||||||
#error
|
|
||||||
}
|
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vector4double operator()(vector4double a, vector4double b){
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
#error
|
return vec_xxnpmadd(a, b, vec_xmul(b, a));
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
|
// Complex float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
|
};
|
||||||
|
|
||||||
struct Mult{
|
struct Mult{
|
||||||
// Real float
|
|
||||||
inline float operator()(float a, float b){
|
|
||||||
#error
|
|
||||||
}
|
|
||||||
// Real double
|
// Real double
|
||||||
inline vector4double operator()(vector4double a, vector4double b){
|
inline vector4double operator()(vector4double a, vector4double b){
|
||||||
#error
|
return vec_mul(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Real float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
|
|
||||||
// Integer
|
// Integer
|
||||||
inline int operator()(int a, int b){
|
inline int operator()(int a, int b){
|
||||||
#error
|
return a*b;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct Conj{
|
struct Conj{
|
||||||
// Complex single
|
|
||||||
inline float operator()(float in){
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
// Complex double
|
// Complex double
|
||||||
inline vector4double operator()(vector4double in){
|
inline vector4double operator()(vector4double v){
|
||||||
assert(0);
|
return vec_mul(v, (vector4double){1., -1., 1., -1.});
|
||||||
}
|
}
|
||||||
// do not define for integer input
|
|
||||||
};
|
|
||||||
|
|
||||||
|
// Complex float
|
||||||
|
FLOAT_WRAP_1(operator(), inline)
|
||||||
|
};
|
||||||
|
|
||||||
struct TimesMinusI{
|
struct TimesMinusI{
|
||||||
//Complex single
|
|
||||||
inline float operator()(float in, float ret){
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
//Complex double
|
//Complex double
|
||||||
inline vector4double operator()(vector4double in, vector4double ret){
|
inline vector4double operator()(vector4double v, vector4double ret){
|
||||||
assert(0);
|
return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
|
||||||
|
(vector4double){0., 0., 0., 0.});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Complex float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TimesI{
|
struct TimesI{
|
||||||
//Complex single
|
|
||||||
inline float operator()(float in, float ret){
|
|
||||||
|
|
||||||
}
|
|
||||||
//Complex double
|
//Complex double
|
||||||
inline vector4double operator()(vector4double in, vector4double ret){
|
inline vector4double operator()(vector4double v, vector4double ret){
|
||||||
|
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
|
||||||
|
(vector4double){0., 0., 0., 0.});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Complex float
|
||||||
|
FLOAT_WRAP_2(operator(), inline)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Permute{
|
||||||
|
//Complex double
|
||||||
|
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
|
||||||
|
return vec_perm(v, v, vec_gpci(02301));
|
||||||
|
};
|
||||||
|
static inline vector4double Permute1(vector4double v){ //0123 -> 1032
|
||||||
|
return vec_perm(v, v, vec_gpci(01032));
|
||||||
|
};
|
||||||
|
static inline vector4double Permute2(vector4double v){
|
||||||
|
return v;
|
||||||
|
};
|
||||||
|
static inline vector4double Permute3(vector4double v){
|
||||||
|
return v;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Complex float
|
||||||
|
FLOAT_WRAP_1(Permute0, static inline)
|
||||||
|
FLOAT_WRAP_1(Permute1, static inline)
|
||||||
|
FLOAT_WRAP_1(Permute2, static inline)
|
||||||
|
FLOAT_WRAP_1(Permute3, static inline)
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Rotate{
|
||||||
|
static inline vector4double rotate(vector4double v, int n){
|
||||||
|
switch(n){
|
||||||
|
case 0:
|
||||||
|
return v;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
return vec_perm(v, v, vec_gpci(01230));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
return vec_perm(v, v, vec_gpci(02301));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
return vec_perm(v, v, vec_gpci(03012));
|
||||||
|
break;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline vector4float rotate(vector4float v, int n){
|
||||||
|
vector4double vd, rd;
|
||||||
|
vector4float r;
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
vd = Vset()(v);
|
||||||
// Some Template specialization
|
rd = rotate(vd, n);
|
||||||
|
Vstore()(rd, r);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){
|
inline Grid::ComplexF
|
||||||
assert(0);
|
Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
|
||||||
|
vector4float v1,v2;
|
||||||
|
|
||||||
|
v1 = Optimization::Permute::Permute0(v);
|
||||||
|
v1 = Optimization::Sum()(v1, v);
|
||||||
|
|
||||||
|
return Grid::ComplexF(v1.v0, v1.v1);
|
||||||
}
|
}
|
||||||
//Real float Reduce
|
//Real float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){
|
inline Grid::RealF
|
||||||
assert(0);
|
Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
|
||||||
|
vector4float v1,v2;
|
||||||
|
|
||||||
|
v1 = Optimization::Permute::Permute0(v);
|
||||||
|
v1 = Optimization::Sum()(v1, v);
|
||||||
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
|
v1 = Optimization::Sum()(v1, v2);
|
||||||
|
|
||||||
|
return v1.v0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, vector4double>::operator()(vector4double in){
|
inline Grid::ComplexD
|
||||||
assert(0);
|
Reduce<Grid::ComplexD, vector4double>::operator()(vector4double v){ //2 complex
|
||||||
|
vector4double v1;
|
||||||
|
|
||||||
|
v1 = Optimization::Permute::Permute0(v);
|
||||||
|
v1 = vec_add(v1, v);
|
||||||
|
|
||||||
|
return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
//Real double Reduce
|
//Real double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, vector4double>::operator()(vector4double in){
|
inline Grid::RealD
|
||||||
assert(0);
|
Reduce<Grid::RealD, vector4double>::operator()(vector4double v){ //4 doubles
|
||||||
}
|
vector4double v1,v2;
|
||||||
|
|
||||||
|
v1 = Optimization::Permute::Permute0(v);
|
||||||
|
v1 = vec_add(v1, v);
|
||||||
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
|
v1 = vec_add(v1, v2);
|
||||||
|
|
||||||
|
return vec_extract(v1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
//Integer Reduce
|
//Integer Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Integer Reduce<Integer, floati>::operator()(float in){
|
inline Integer Reduce<Integer, int>::operator()(int in){
|
||||||
|
// FIXME unimplemented
|
||||||
|
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
namespace Grid {
|
typedef Optimization::vector4float SIMD_Ftype; // Single precision type
|
||||||
typedef float SIMD_Ftype __attribute__ ((vector_size (16))); // Single precision type
|
typedef vector4double SIMD_Dtype; // Double precision type
|
||||||
typedef vector4double SIMD_Dtype; // Double precision type
|
typedef int SIMD_Itype; // Integer type
|
||||||
typedef int SIMD_Itype; // Integer type
|
|
||||||
|
|
||||||
inline void v_prefetch0(int size, const char *ptr){};
|
// prefetch utilities
|
||||||
|
inline void v_prefetch0(int size, const char *ptr){};
|
||||||
// Function name aliases
|
inline void prefetch_HINT_T0(const char *ptr){};
|
||||||
typedef Optimization::Vsplat VsplatSIMD;
|
|
||||||
typedef Optimization::Vstore VstoreSIMD;
|
|
||||||
typedef Optimization::Vset VsetSIMD;
|
|
||||||
typedef Optimization::Vstream VstreamSIMD;
|
|
||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
|
||||||
|
|
||||||
|
|
||||||
// Arithmetic operations
|
// Function name aliases
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Vsplat VsplatSIMD;
|
||||||
typedef Optimization::Sub SubSIMD;
|
typedef Optimization::Vstore VstoreSIMD;
|
||||||
typedef Optimization::Mult MultSIMD;
|
typedef Optimization::Vset VsetSIMD;
|
||||||
typedef Optimization::MultComplex MultComplexSIMD;
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
typedef Optimization::Conj ConjSIMD;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
|
||||||
|
|
||||||
|
// Arithmetic operations
|
||||||
|
typedef Optimization::Sum SumSIMD;
|
||||||
|
typedef Optimization::Sub SubSIMD;
|
||||||
|
typedef Optimization::Mult MultSIMD;
|
||||||
|
typedef Optimization::MultComplex MultComplexSIMD;
|
||||||
|
typedef Optimization::Conj ConjSIMD;
|
||||||
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -388,6 +388,12 @@ class Grid_simd {
|
|||||||
|
|
||||||
}; // end of Grid_simd class definition
|
}; // end of Grid_simd class definition
|
||||||
|
|
||||||
|
|
||||||
|
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
|
||||||
|
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
|
||||||
|
inline void permute(RealD &y,RealD b, int perm) { y=b; }
|
||||||
|
inline void permute(RealF &y,RealF b, int perm) { y=b; }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// General rotate
|
// General rotate
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
|
@ -67,15 +67,13 @@ template <class scalar>
|
|||||||
struct AsinRealFunctor {
|
struct AsinRealFunctor {
|
||||||
scalar operator()(const scalar &a) const { return asin(real(a)); }
|
scalar operator()(const scalar &a) const { return asin(real(a)); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class scalar>
|
template <class scalar>
|
||||||
struct LogRealFunctor {
|
struct LogRealFunctor {
|
||||||
scalar operator()(const scalar &a) const { return log(real(a)); }
|
scalar operator()(const scalar &a) const { return log(real(a)); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class scalar>
|
template <class scalar>
|
||||||
struct ExpRealFunctor {
|
struct ExpFunctor {
|
||||||
scalar operator()(const scalar &a) const { return exp(real(a)); }
|
scalar operator()(const scalar &a) const { return exp(a); }
|
||||||
};
|
};
|
||||||
template <class scalar>
|
template <class scalar>
|
||||||
struct NotFunctor {
|
struct NotFunctor {
|
||||||
@ -85,7 +83,6 @@ template <class scalar>
|
|||||||
struct AbsRealFunctor {
|
struct AbsRealFunctor {
|
||||||
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
|
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class scalar>
|
template <class scalar>
|
||||||
struct PowRealFunctor {
|
struct PowRealFunctor {
|
||||||
double y;
|
double y;
|
||||||
@ -135,7 +132,6 @@ template <class Scalar>
|
|||||||
inline Scalar rsqrt(const Scalar &r) {
|
inline Scalar rsqrt(const Scalar &r) {
|
||||||
return (RSqrtRealFunctor<Scalar>(), r);
|
return (RSqrtRealFunctor<Scalar>(), r);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class S, class V>
|
template <class S, class V>
|
||||||
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
||||||
return SimdApply(CosRealFunctor<S>(), r);
|
return SimdApply(CosRealFunctor<S>(), r);
|
||||||
@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
|
|||||||
}
|
}
|
||||||
template <class S, class V>
|
template <class S, class V>
|
||||||
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
|
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
|
||||||
return SimdApply(ExpRealFunctor<S>(), r);
|
return SimdApply(ExpFunctor<S>(), r);
|
||||||
}
|
}
|
||||||
template <class S, class V>
|
template <class S, class V>
|
||||||
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
|
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
|
||||||
|
18
scripts/update_fftw.sh
Executable file
18
scripts/update_fftw.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
if (( $# != 1 )); then
|
||||||
|
echo "usage: `basename $0` <archive>" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
ARC=$1
|
||||||
|
|
||||||
|
INITDIR=`pwd`
|
||||||
|
rm -rf lib/fftw
|
||||||
|
mkdir lib/fftw
|
||||||
|
|
||||||
|
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
|
||||||
|
tar -xf ${ARC}
|
||||||
|
cp ${ARCDIR}/api/fftw3.h lib/fftw/
|
||||||
|
|
||||||
|
cd ${INITDIR}
|
||||||
|
rm -rf ${ARCDIR}
|
@ -157,10 +157,9 @@ void Tester(const functor &func)
|
|||||||
std::cout << GridLogMessage << " " << func.name() << std::endl;
|
std::cout << GridLogMessage << " " << func.name() << std::endl;
|
||||||
|
|
||||||
std::cout << GridLogDebug << v_input1 << std::endl;
|
std::cout << GridLogDebug << v_input1 << std::endl;
|
||||||
|
std::cout << GridLogDebug << v_input2 << std::endl;
|
||||||
std::cout << GridLogDebug << v_result << std::endl;
|
std::cout << GridLogDebug << v_result << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int ok=0;
|
int ok=0;
|
||||||
for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
if ( abs(reference[i]-result[i])>1.0e-7){
|
if ( abs(reference[i]-result[i])>1.0e-7){
|
||||||
|
111
tests/core/Test_fft.cc
Normal file
111
tests/core/Test_fft.cc
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_cshift.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
|
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
int vol = 1;
|
||||||
|
for(int d=0;d<latt_size.size();d++){
|
||||||
|
vol = vol * latt_size[d];
|
||||||
|
}
|
||||||
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
LatticeComplexD one(&Fine);
|
||||||
|
LatticeComplexD zz(&Fine);
|
||||||
|
LatticeComplexD C(&Fine);
|
||||||
|
LatticeComplexD Ctilde(&Fine);
|
||||||
|
LatticeComplexD coor(&Fine);
|
||||||
|
|
||||||
|
LatticeSpinMatrixD S(&Fine);
|
||||||
|
LatticeSpinMatrixD Stilde(&Fine);
|
||||||
|
|
||||||
|
std::vector<int> p({1,2,3,2});
|
||||||
|
|
||||||
|
one = ComplexD(1.0,0.0);
|
||||||
|
zz = ComplexD(0.0,0.0);
|
||||||
|
|
||||||
|
ComplexD ci(0.0,1.0);
|
||||||
|
|
||||||
|
C=zero;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
C = C - (TwoPiL * p[mu]) * coor;
|
||||||
|
}
|
||||||
|
|
||||||
|
C = exp(C*ci);
|
||||||
|
|
||||||
|
S=zero;
|
||||||
|
S = S+C;
|
||||||
|
|
||||||
|
FFT theFFT(&Fine);
|
||||||
|
|
||||||
|
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
|
||||||
|
// C=zero;
|
||||||
|
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
|
||||||
|
TComplexD cVol;
|
||||||
|
cVol()()() = vol;
|
||||||
|
|
||||||
|
C=zero;
|
||||||
|
pokeSite(cVol,C,p);
|
||||||
|
C=C-Ctilde;
|
||||||
|
std::cout << "diff scalar "<<norm2(C) << std::endl;
|
||||||
|
|
||||||
|
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
|
||||||
|
SpinMatrixD Sp;
|
||||||
|
Sp = zero; Sp = Sp+cVol;
|
||||||
|
|
||||||
|
S=zero;
|
||||||
|
pokeSite(Sp,S,p);
|
||||||
|
|
||||||
|
S= S-Stilde;
|
||||||
|
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
111
tests/core/Test_fftf.cc
Normal file
111
tests/core/Test_fftf.cc
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_cshift.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
|
std::vector<int> latt_size = GridDefaultLatt();
|
||||||
|
std::vector<int> simd_layout( { vComplexF::Nsimd(),1,1,1});
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
int vol = 1;
|
||||||
|
for(int d=0;d<latt_size.size();d++){
|
||||||
|
vol = vol * latt_size[d];
|
||||||
|
}
|
||||||
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
LatticeComplexF one(&Fine);
|
||||||
|
LatticeComplexF zz(&Fine);
|
||||||
|
LatticeComplexF C(&Fine);
|
||||||
|
LatticeComplexF Ctilde(&Fine);
|
||||||
|
LatticeComplexF coor(&Fine);
|
||||||
|
|
||||||
|
LatticeSpinMatrixF S(&Fine);
|
||||||
|
LatticeSpinMatrixF Stilde(&Fine);
|
||||||
|
|
||||||
|
std::vector<int> p({1,2,3,2});
|
||||||
|
|
||||||
|
one = ComplexF(1.0,0.0);
|
||||||
|
zz = ComplexF(0.0,0.0);
|
||||||
|
|
||||||
|
ComplexF ci(0.0,1.0);
|
||||||
|
|
||||||
|
C=zero;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
C = C - (TwoPiL * p[mu]) * coor;
|
||||||
|
}
|
||||||
|
|
||||||
|
C = exp(C*ci);
|
||||||
|
|
||||||
|
S=zero;
|
||||||
|
S = S+C;
|
||||||
|
|
||||||
|
FFT theFFT(&Fine);
|
||||||
|
|
||||||
|
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
|
||||||
|
// C=zero;
|
||||||
|
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
|
||||||
|
TComplexF cVol;
|
||||||
|
cVol()()() = vol;
|
||||||
|
|
||||||
|
C=zero;
|
||||||
|
pokeSite(cVol,C,p);
|
||||||
|
C=C-Ctilde;
|
||||||
|
std::cout << "diff scalar "<<norm2(C) << std::endl;
|
||||||
|
|
||||||
|
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
|
||||||
|
|
||||||
|
SpinMatrixF Sp;
|
||||||
|
Sp = zero; Sp = Sp+cVol;
|
||||||
|
|
||||||
|
S=zero;
|
||||||
|
pokeSite(Sp,S,p);
|
||||||
|
|
||||||
|
S= S-Stilde;
|
||||||
|
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -44,6 +44,7 @@ struct scal {
|
|||||||
};
|
};
|
||||||
|
|
||||||
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR;
|
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR;
|
||||||
|
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusVecFermionR;
|
||||||
typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR;
|
typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR;
|
||||||
typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR;
|
typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR;
|
||||||
typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR;
|
typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR;
|
||||||
@ -117,6 +118,17 @@ int main (int argc, char ** argv)
|
|||||||
TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
|
TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
|
||||||
TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
|
TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"Z-MobiusFermion test"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
||||||
|
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
|
||||||
|
ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
|
||||||
|
ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
|
||||||
|
TestMoo(zDmob,szDmob);
|
||||||
|
TestWhat<ZMobiusFermionR>(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
|
||||||
|
TestWhat<ZMobiusVecFermionR>(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
||||||
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
|
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user