mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'feature/FFT' into develop
This commit is contained in:
commit
8a02824e08
14
configure.ac
14
configure.ac
@ -43,6 +43,7 @@ AC_ARG_WITH([mpfr],
|
||||
AC_ARG_ENABLE([lapack],
|
||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
||||
|
||||
case ${ac_LAPACK} in
|
||||
no)
|
||||
;;
|
||||
@ -54,6 +55,17 @@ case ${ac_LAPACK} in
|
||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
|
||||
esac
|
||||
|
||||
AC_CHECK_LIB([fftw3],[fft_init_threads],
|
||||
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes],
|
||||
[ac_fftw=no])
|
||||
case ${ac_fftw} in
|
||||
no)
|
||||
;;
|
||||
yes)
|
||||
AM_LDFLAGS="-lfftw3 $AM_LDFLAGS"
|
||||
esac
|
||||
|
||||
|
||||
################ Get compiler informations
|
||||
AC_LANG([C++])
|
||||
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||
@ -77,7 +89,7 @@ AC_CHECK_LIB([gmp],[__gmpf_init],
|
||||
[have_mpfr=true]
|
||||
[LIBS="$LIBS -lmpfr"],
|
||||
[AC_MSG_ERROR([MPFR library not found])])]
|
||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
||||
[have_gmp=true]
|
||||
[LIBS="$LIBS -lgmp"],
|
||||
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
|
||||
|
193
lib/FFT.h
Normal file
193
lib/FFT.h
Normal file
@ -0,0 +1,193 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cshift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_FFT_H_
|
||||
#define _GRID_FFT_H_
|
||||
|
||||
#include <Grid/fftw/fftw3.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
class FFT {
|
||||
private:
|
||||
|
||||
GridCartesian *vgrid;
|
||||
GridCartesian *sgrid;
|
||||
|
||||
int Nd;
|
||||
std::vector<int> dimensions;
|
||||
std::vector<int> processors;
|
||||
std::vector<int> processor_coor;
|
||||
|
||||
public:
|
||||
|
||||
static const int forward=FFTW_FORWARD;
|
||||
static const int backward=FFTW_BACKWARD;
|
||||
|
||||
FFT ( GridCartesian * grid ) :
|
||||
vgrid(grid),
|
||||
Nd(grid->_ndimension),
|
||||
dimensions(grid->_fdimensions),
|
||||
processors(grid->_processors),
|
||||
processor_coor(grid->_processor_coor)
|
||||
{
|
||||
std::vector<int> layout(Nd,1);
|
||||
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||
};
|
||||
|
||||
~FFT ( void) {
|
||||
delete sgrid;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
|
||||
|
||||
conformable(result._grid,vgrid);
|
||||
conformable(source._grid,vgrid);
|
||||
|
||||
int L = vgrid->_ldimensions[dim];
|
||||
int G = vgrid->_fdimensions[dim];
|
||||
|
||||
std::vector<int> layout(Nd,1);
|
||||
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||
std::vector<int> pencil_ld(processors);
|
||||
|
||||
pencil_gd[dim] = G*processors[dim];
|
||||
pencil_ld[dim] = G*processors[dim];
|
||||
|
||||
// Pencil global vol LxLxGxLxL per node
|
||||
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||
GridCartesian pencil_l(pencil_ld,layout,processors);
|
||||
|
||||
// Construct pencils
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
Lattice<vobj> ssource(vgrid); ssource =source;
|
||||
Lattice<sobj> pgsource(&pencil_g);
|
||||
Lattice<sobj> pgresult(&pencil_g);
|
||||
Lattice<sobj> plsource(&pencil_l);
|
||||
Lattice<sobj> plresult(&pencil_l);
|
||||
|
||||
{
|
||||
assert(sizeof(typename sobj::scalar_type)==sizeof(ComplexD));
|
||||
assert(sizeof(fftw_complex)==sizeof(ComplexD));
|
||||
assert(sizeof(fftw_complex)==sizeof(ComplexD));
|
||||
|
||||
int Ncomp = sizeof(sobj)/sizeof(fftw_complex);
|
||||
|
||||
int rank = 1; /* not 2: we are computing 1d transforms */
|
||||
int n[] = {G}; /* 1d transforms of length G */
|
||||
int howmany = Ncomp;
|
||||
int odist,idist,istride,ostride;
|
||||
idist = odist = 1;
|
||||
istride = ostride = Ncomp; /* distance between two elements in the same column */
|
||||
int *inembed = n, *onembed = n;
|
||||
|
||||
fftw_complex *in = (fftw_complex *)&plsource._odata[0];
|
||||
fftw_complex *out= (fftw_complex *)&plresult._odata[0];
|
||||
|
||||
int sign = FFTW_FORWARD;
|
||||
if (inverse) sign = FFTW_BACKWARD;
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
fftw_plan p = fftw_plan_many_dft(rank,n,howmany,
|
||||
in,inembed,
|
||||
istride,idist,
|
||||
out,onembed,
|
||||
ostride, odist,
|
||||
sign,FFTW_ESTIMATE);
|
||||
#else
|
||||
fftw_plan p ;
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
// Barrel shift and collect global pencil
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
|
||||
std::vector<int> lcoor(Nd);
|
||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
|
||||
sobj s;
|
||||
|
||||
peekLocalSite(s,ssource,lcoor);
|
||||
|
||||
lcoor[dim]+=p*L;
|
||||
|
||||
pokeLocalSite(s,pgsource,lcoor);
|
||||
}
|
||||
|
||||
ssource = Cshift(ssource,dim,L);
|
||||
}
|
||||
|
||||
// Loop over orthog coords
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
|
||||
std::vector<int> pcoor(Nd,0);
|
||||
std::vector<int> lcoor(Nd);
|
||||
sgrid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
|
||||
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
|
||||
// Project to local pencil array
|
||||
for(int l=0;l<G;l++){
|
||||
sobj s;
|
||||
pcoor[dim]=l;
|
||||
lcoor[dim]=l;
|
||||
peekLocalSite(s,pgsource,lcoor);
|
||||
pokeLocalSite(s,plsource,pcoor);
|
||||
}
|
||||
|
||||
// FFT the pencil
|
||||
#ifdef HAVE_FFTW
|
||||
fftw_execute(p);
|
||||
#endif
|
||||
|
||||
// Extract the result
|
||||
for(int l=0;l<L;l++){
|
||||
sobj s;
|
||||
int p = processor_coor[dim];
|
||||
lcoor[dim] = l;
|
||||
pcoor[dim] = l+L*p;
|
||||
peekLocalSite(s,plresult,pcoor);
|
||||
pokeLocalSite(s,result,lcoor);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
fftw_destroy_plan(p);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/Simd.h>
|
||||
#include <Grid/Threads.h>
|
||||
#include <Grid/Lexicographic.h>
|
||||
#include <Grid/Init.h>
|
||||
#include <Grid/Communicator.h>
|
||||
#include <Grid/Cartesian.h>
|
||||
#include <Grid/Tensors.h>
|
||||
@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/parallelIO/BinaryIO.h>
|
||||
#include <Grid/qcd/QCD.h>
|
||||
#include <Grid/parallelIO/NerscIO.h>
|
||||
#include <Grid/Init.h>
|
||||
|
||||
#include <Grid/FFT.h>
|
||||
|
||||
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||
|
412
lib/fftw/fftw3.h
Normal file
412
lib/fftw/fftw3.h
Normal file
@ -0,0 +1,412 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2007-14 Matteo Frigo
|
||||
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
|
||||
*
|
||||
* The following statement of license applies *only* to this header file,
|
||||
* and *not* to the other files distributed with FFTW or derived therefrom:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/***************************** NOTE TO USERS *********************************
|
||||
*
|
||||
* THIS IS A HEADER FILE, NOT A MANUAL
|
||||
*
|
||||
* If you want to know how to use FFTW, please read the manual,
|
||||
* online at http://www.fftw.org/doc/ and also included with FFTW.
|
||||
* For a quick start, see the manual's tutorial section.
|
||||
*
|
||||
* (Reading header files to learn how to use a library is a habit
|
||||
* stemming from code lacking a proper manual. Arguably, it's a
|
||||
* *bad* habit in most cases, because header files can contain
|
||||
* interfaces that are not part of the public, stable API.)
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef FFTW3_H
|
||||
#define FFTW3_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* If <complex.h> is included, use the C99 complex type. Otherwise
|
||||
define a type bit-compatible with C99 complex */
|
||||
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
|
||||
# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
|
||||
#else
|
||||
# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
|
||||
#endif
|
||||
|
||||
#define FFTW_CONCAT(prefix, name) prefix ## name
|
||||
#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
|
||||
#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
|
||||
#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
|
||||
#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name)
|
||||
|
||||
/* IMPORTANT: for Windows compilers, you should add a line
|
||||
#define FFTW_DLL
|
||||
here and in kernel/ifftw.h if you are compiling/using FFTW as a
|
||||
DLL, in order to do the proper importing/exporting, or
|
||||
alternatively compile with -DFFTW_DLL or the equivalent
|
||||
command-line flag. This is not necessary under MinGW/Cygwin, where
|
||||
libtool does the imports/exports automatically. */
|
||||
#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__))
|
||||
/* annoying Windows syntax for shared-library declarations */
|
||||
# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */
|
||||
# define FFTW_EXTERN extern __declspec(dllexport)
|
||||
# else /* user is calling FFTW; import symbol */
|
||||
# define FFTW_EXTERN extern __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# define FFTW_EXTERN extern
|
||||
#endif
|
||||
|
||||
enum fftw_r2r_kind_do_not_use_me {
|
||||
FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
|
||||
FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
|
||||
FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
|
||||
};
|
||||
|
||||
struct fftw_iodim_do_not_use_me {
|
||||
int n; /* dimension size */
|
||||
int is; /* input stride */
|
||||
int os; /* output stride */
|
||||
};
|
||||
|
||||
#include <stddef.h> /* for ptrdiff_t */
|
||||
struct fftw_iodim64_do_not_use_me {
|
||||
ptrdiff_t n; /* dimension size */
|
||||
ptrdiff_t is; /* input stride */
|
||||
ptrdiff_t os; /* output stride */
|
||||
};
|
||||
|
||||
typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
|
||||
typedef int (*fftw_read_char_func_do_not_use_me)(void *);
|
||||
|
||||
/*
|
||||
huge second-order macro that defines prototypes for all API
|
||||
functions. We expand this macro for each supported precision
|
||||
|
||||
X: name-mangling macro
|
||||
R: real data type
|
||||
C: complex data type
|
||||
*/
|
||||
|
||||
#define FFTW_DEFINE_API(X, R, C) \
|
||||
\
|
||||
FFTW_DEFINE_COMPLEX(R, C); \
|
||||
\
|
||||
typedef struct X(plan_s) *X(plan); \
|
||||
\
|
||||
typedef struct fftw_iodim_do_not_use_me X(iodim); \
|
||||
typedef struct fftw_iodim64_do_not_use_me X(iodim64); \
|
||||
\
|
||||
typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \
|
||||
\
|
||||
typedef fftw_write_char_func_do_not_use_me X(write_char_func); \
|
||||
typedef fftw_read_char_func_do_not_use_me X(read_char_func); \
|
||||
\
|
||||
FFTW_EXTERN void X(execute)(const X(plan) p); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \
|
||||
C *in, C *out, int sign, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \
|
||||
C *in, C *out, int sign, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \
|
||||
C *in, C *out, int sign, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \
|
||||
int howmany, \
|
||||
C *in, const int *inembed, \
|
||||
int istride, int idist, \
|
||||
C *out, const int *onembed, \
|
||||
int ostride, int odist, \
|
||||
int sign, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
C *in, C *out, \
|
||||
int sign, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
R *ri, R *ii, R *ro, R *io, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \
|
||||
const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
C *in, C *out, \
|
||||
int sign, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \
|
||||
const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
R *ri, R *ii, R *ro, R *io, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \
|
||||
FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \
|
||||
R *ro, R *io); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \
|
||||
int howmany, \
|
||||
R *in, const int *inembed, \
|
||||
int istride, int idist, \
|
||||
C *out, const int *onembed, \
|
||||
int ostride, int odist, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \
|
||||
R *in, C *out, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \
|
||||
R *in, C *out, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \
|
||||
int n2, \
|
||||
R *in, C *out, unsigned flags); \
|
||||
\
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \
|
||||
int howmany, \
|
||||
C *in, const int *inembed, \
|
||||
int istride, int idist, \
|
||||
R *out, const int *onembed, \
|
||||
int ostride, int odist, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \
|
||||
C *in, R *out, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \
|
||||
C *in, R *out, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \
|
||||
int n2, \
|
||||
C *in, R *out, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
R *in, C *out, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
C *in, R *out, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \
|
||||
int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
R *in, R *ro, R *io, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \
|
||||
int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
R *ri, R *ii, R *out, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \
|
||||
const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
R *in, C *out, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \
|
||||
const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
C *in, R *out, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \
|
||||
int rank, const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
R *in, R *ro, R *io, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \
|
||||
int rank, const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
R *ri, R *ii, R *out, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \
|
||||
FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \
|
||||
\
|
||||
FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \
|
||||
R *in, R *ro, R *io); \
|
||||
FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \
|
||||
R *ri, R *ii, R *out); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \
|
||||
int howmany, \
|
||||
R *in, const int *inembed, \
|
||||
int istride, int idist, \
|
||||
R *out, const int *onembed, \
|
||||
int ostride, int odist, \
|
||||
const X(r2r_kind) *kind, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \
|
||||
const X(r2r_kind) *kind, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \
|
||||
X(r2r_kind) kind, unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \
|
||||
X(r2r_kind) kind0, X(r2r_kind) kind1, \
|
||||
unsigned flags); \
|
||||
FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \
|
||||
R *in, R *out, X(r2r_kind) kind0, \
|
||||
X(r2r_kind) kind1, X(r2r_kind) kind2, \
|
||||
unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim) *howmany_dims, \
|
||||
R *in, R *out, \
|
||||
const X(r2r_kind) *kind, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \
|
||||
int howmany_rank, \
|
||||
const X(iodim64) *howmany_dims, \
|
||||
R *in, R *out, \
|
||||
const X(r2r_kind) *kind, unsigned flags); \
|
||||
\
|
||||
FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \
|
||||
\
|
||||
FFTW_EXTERN void X(destroy_plan)(X(plan) p); \
|
||||
FFTW_EXTERN void X(forget_wisdom)(void); \
|
||||
FFTW_EXTERN void X(cleanup)(void); \
|
||||
\
|
||||
FFTW_EXTERN void X(set_timelimit)(double t); \
|
||||
\
|
||||
FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \
|
||||
FFTW_EXTERN int X(init_threads)(void); \
|
||||
FFTW_EXTERN void X(cleanup_threads)(void); \
|
||||
\
|
||||
FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \
|
||||
FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \
|
||||
FFTW_EXTERN char *X(export_wisdom_to_string)(void); \
|
||||
FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \
|
||||
void *data); \
|
||||
FFTW_EXTERN int X(import_system_wisdom)(void); \
|
||||
FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \
|
||||
FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \
|
||||
FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \
|
||||
FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \
|
||||
\
|
||||
FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \
|
||||
FFTW_EXTERN void X(print_plan)(const X(plan) p); \
|
||||
FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \
|
||||
\
|
||||
FFTW_EXTERN void *X(malloc)(size_t n); \
|
||||
FFTW_EXTERN R *X(alloc_real)(size_t n); \
|
||||
FFTW_EXTERN C *X(alloc_complex)(size_t n); \
|
||||
FFTW_EXTERN void X(free)(void *p); \
|
||||
\
|
||||
FFTW_EXTERN void X(flops)(const X(plan) p, \
|
||||
double *add, double *mul, double *fmas); \
|
||||
FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \
|
||||
FFTW_EXTERN double X(cost)(const X(plan) p); \
|
||||
\
|
||||
FFTW_EXTERN int X(alignment_of)(R *p); \
|
||||
FFTW_EXTERN const char X(version)[]; \
|
||||
FFTW_EXTERN const char X(cc)[]; \
|
||||
FFTW_EXTERN const char X(codelet_optim)[];
|
||||
|
||||
|
||||
/* end of FFTW_DEFINE_API macro */
|
||||
|
||||
FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
|
||||
FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
|
||||
FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
|
||||
|
||||
/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64
|
||||
for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
|
||||
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \
|
||||
&& !(defined(__ICC) || defined(__INTEL_COMPILER)) \
|
||||
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
|
||||
# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
|
||||
/* note: __float128 is a typedef, which is not supported with the _Complex
|
||||
keyword in gcc, so instead we use this ugly __attribute__ version.
|
||||
However, we can't simply pass the __attribute__ version to
|
||||
FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer
|
||||
types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */
|
||||
# undef FFTW_DEFINE_COMPLEX
|
||||
# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C
|
||||
# endif
|
||||
FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex)
|
||||
#endif
|
||||
|
||||
#define FFTW_FORWARD (-1)
|
||||
#define FFTW_BACKWARD (+1)
|
||||
|
||||
#define FFTW_NO_TIMELIMIT (-1.0)
|
||||
|
||||
/* documented flags */
|
||||
#define FFTW_MEASURE (0U)
|
||||
#define FFTW_DESTROY_INPUT (1U << 0)
|
||||
#define FFTW_UNALIGNED (1U << 1)
|
||||
#define FFTW_CONSERVE_MEMORY (1U << 2)
|
||||
#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
|
||||
#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
|
||||
#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
|
||||
#define FFTW_ESTIMATE (1U << 6)
|
||||
#define FFTW_WISDOM_ONLY (1U << 21)
|
||||
|
||||
/* undocumented beyond-guru flags */
|
||||
#define FFTW_ESTIMATE_PATIENT (1U << 7)
|
||||
#define FFTW_BELIEVE_PCOST (1U << 8)
|
||||
#define FFTW_NO_DFT_R2HC (1U << 9)
|
||||
#define FFTW_NO_NONTHREADED (1U << 10)
|
||||
#define FFTW_NO_BUFFERING (1U << 11)
|
||||
#define FFTW_NO_INDIRECT_OP (1U << 12)
|
||||
#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
|
||||
#define FFTW_NO_RANK_SPLITS (1U << 14)
|
||||
#define FFTW_NO_VRANK_SPLITS (1U << 15)
|
||||
#define FFTW_NO_VRECURSE (1U << 16)
|
||||
#define FFTW_NO_SIMD (1U << 17)
|
||||
#define FFTW_NO_SLOW (1U << 18)
|
||||
#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19)
|
||||
#define FFTW_ALLOW_PRUNING (1U << 20)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* FFTW3_H */
|
@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
||||
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
//PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<ig->lSites();idx++){
|
||||
std::vector<int> lcoor(ni);
|
||||
ig->LocalIndexToLocalCoor(idx,lcoor);
|
||||
@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
||||
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj s;
|
||||
|
||||
GridBase *lg = lowDim._grid;
|
||||
GridBase *hg = higherDim._grid;
|
||||
int nl = lg->_ndimension;
|
||||
int nh = hg->_ndimension;
|
||||
|
||||
assert(nl == nh);
|
||||
assert(orthog<nh);
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
//PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||
if( lcoor[orthog] == slice_lo ) {
|
||||
hcoor=lcoor;
|
||||
hcoor[orthog] = slice_hi;
|
||||
peekLocalSite(s,lowDim,lcoor);
|
||||
pokeLocalSite(s,higherDim,hcoor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj s;
|
||||
|
||||
GridBase *lg = lowDim._grid;
|
||||
GridBase *hg = higherDim._grid;
|
||||
int nl = lg->_ndimension;
|
||||
int nh = hg->_ndimension;
|
||||
|
||||
assert(nl == nh);
|
||||
assert(orthog<nh);
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
//PARALLEL_FOR_LOOP
|
||||
for(int idx=0;idx<lg->lSites();idx++){
|
||||
std::vector<int> lcoor(nl);
|
||||
std::vector<int> hcoor(nh);
|
||||
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||
if( lcoor[orthog] == slice_lo ) {
|
||||
hcoor=lcoor;
|
||||
hcoor[orthog] = slice_hi;
|
||||
peekLocalSite(s,higherDim,hcoor);
|
||||
pokeLocalSite(s,lowDim,lcoor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||
{
|
||||
|
@ -388,6 +388,12 @@ class Grid_simd {
|
||||
|
||||
}; // end of Grid_simd class definition
|
||||
|
||||
|
||||
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
|
||||
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
|
||||
inline void permute(RealD &y,RealD b, int perm) { y=b; }
|
||||
inline void permute(RealF &y,RealF b, int perm) { y=b; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General rotate
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
@ -67,15 +67,13 @@ template <class scalar>
|
||||
struct AsinRealFunctor {
|
||||
scalar operator()(const scalar &a) const { return asin(real(a)); }
|
||||
};
|
||||
|
||||
template <class scalar>
|
||||
struct LogRealFunctor {
|
||||
scalar operator()(const scalar &a) const { return log(real(a)); }
|
||||
};
|
||||
|
||||
template <class scalar>
|
||||
struct ExpRealFunctor {
|
||||
scalar operator()(const scalar &a) const { return exp(real(a)); }
|
||||
struct ExpFunctor {
|
||||
scalar operator()(const scalar &a) const { return exp(a); }
|
||||
};
|
||||
template <class scalar>
|
||||
struct NotFunctor {
|
||||
@ -85,7 +83,6 @@ template <class scalar>
|
||||
struct AbsRealFunctor {
|
||||
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
|
||||
};
|
||||
|
||||
template <class scalar>
|
||||
struct PowRealFunctor {
|
||||
double y;
|
||||
@ -135,7 +132,6 @@ template <class Scalar>
|
||||
inline Scalar rsqrt(const Scalar &r) {
|
||||
return (RSqrtRealFunctor<Scalar>(), r);
|
||||
}
|
||||
|
||||
template <class S, class V>
|
||||
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
|
||||
return SimdApply(CosRealFunctor<S>(), r);
|
||||
@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
|
||||
}
|
||||
template <class S, class V>
|
||||
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
|
||||
return SimdApply(ExpRealFunctor<S>(), r);
|
||||
return SimdApply(ExpFunctor<S>(), r);
|
||||
}
|
||||
template <class S, class V>
|
||||
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {
|
||||
|
108
tests/core/Test_fft.cc
Normal file
108
tests/core/Test_fft.cc
Normal file
@ -0,0 +1,108 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_cshift.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
int vol = 1;
|
||||
for(int d=0;d<latt_size.size();d++){
|
||||
vol = vol * latt_size[d];
|
||||
}
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
LatticeComplexD one(&Fine);
|
||||
LatticeComplexD zz(&Fine);
|
||||
LatticeComplexD C(&Fine);
|
||||
LatticeComplexD Ctilde(&Fine);
|
||||
LatticeComplexD coor(&Fine);
|
||||
|
||||
LatticeSpinMatrixD S(&Fine);
|
||||
LatticeSpinMatrixD Stilde(&Fine);
|
||||
|
||||
std::vector<int> p({1,2,3,2});
|
||||
|
||||
one = ComplexD(1.0,0.0);
|
||||
zz = ComplexD(0.0,0.0);
|
||||
|
||||
ComplexD ci(0.0,1.0);
|
||||
|
||||
C=zero;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
LatticeCoordinate(coor,mu);
|
||||
C = C - (TwoPiL * p[mu]) * coor;
|
||||
}
|
||||
|
||||
C = exp(C*ci);
|
||||
|
||||
S=zero;
|
||||
S = S+C;
|
||||
|
||||
FFT theFFT(&Fine);
|
||||
|
||||
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde;
|
||||
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde;
|
||||
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde;
|
||||
theFFT.FFT_dim(Ctilde,C,3,FFT::forward);
|
||||
|
||||
// C=zero;
|
||||
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
|
||||
TComplexD cVol;
|
||||
cVol()()() = vol;
|
||||
|
||||
C=zero;
|
||||
pokeSite(cVol,C,p);
|
||||
C=C-Ctilde;
|
||||
std::cout << "diff scalar "<<norm2(C) << std::endl;
|
||||
|
||||
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde;
|
||||
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;
|
||||
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;
|
||||
theFFT.FFT_dim(Stilde,S,3,FFT::forward);
|
||||
|
||||
SpinMatrixD Sp;
|
||||
Sp = zero; Sp = Sp+cVol;
|
||||
|
||||
S=zero;
|
||||
pokeSite(Sp,S,p);
|
||||
|
||||
S= S-Stilde;
|
||||
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
Loading…
Reference in New Issue
Block a user