mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	Merge branch 'feature/FFT' into develop
This commit is contained in:
		
							
								
								
									
										12
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -43,6 +43,7 @@ AC_ARG_WITH([mpfr], | |||||||
| AC_ARG_ENABLE([lapack], | AC_ARG_ENABLE([lapack], | ||||||
|     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  |     [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],  | ||||||
|     [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) |     [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) | ||||||
|  |  | ||||||
| case ${ac_LAPACK} in | case ${ac_LAPACK} in | ||||||
|     no) |     no) | ||||||
|         ;; |         ;; | ||||||
| @@ -54,6 +55,17 @@ case ${ac_LAPACK} in | |||||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) |         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) | ||||||
| esac | esac | ||||||
|  |  | ||||||
|  | AC_CHECK_LIB([fftw3],[fft_init_threads], | ||||||
|  | 	[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes], | ||||||
|  |         [ac_fftw=no]) | ||||||
|  | case ${ac_fftw} in | ||||||
|  |     no) | ||||||
|  |         ;; | ||||||
|  |     yes) | ||||||
|  |         AM_LDFLAGS="-lfftw3 $AM_LDFLAGS" | ||||||
|  | esac | ||||||
|  |  | ||||||
|  |  | ||||||
| ################ Get compiler informations | ################ Get compiler informations | ||||||
| AC_LANG([C++]) | AC_LANG([C++]) | ||||||
| AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) | AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) | ||||||
|   | |||||||
							
								
								
									
										193
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								lib/FFT.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,193 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/Cshift.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef _GRID_FFT_H_ | ||||||
|  | #define _GRID_FFT_H_ | ||||||
|  |  | ||||||
|  | #include <Grid/fftw/fftw3.h> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  |    | ||||||
|  |   class FFT {  | ||||||
|  |   private: | ||||||
|  |  | ||||||
|  |     GridCartesian *vgrid; | ||||||
|  |     GridCartesian *sgrid; | ||||||
|  |  | ||||||
|  |     int Nd; | ||||||
|  |     std::vector<int> dimensions; | ||||||
|  |     std::vector<int> processors; | ||||||
|  |     std::vector<int> processor_coor; | ||||||
|  |  | ||||||
|  |   public: | ||||||
|  |  | ||||||
|  |     static const int forward=FFTW_FORWARD; | ||||||
|  |     static const int backward=FFTW_BACKWARD; | ||||||
|  |  | ||||||
|  |     FFT ( GridCartesian * grid ) :  | ||||||
|  |       vgrid(grid), | ||||||
|  |       Nd(grid->_ndimension), | ||||||
|  |       dimensions(grid->_fdimensions), | ||||||
|  |       processors(grid->_processors), | ||||||
|  |       processor_coor(grid->_processor_coor) | ||||||
|  |     { | ||||||
|  |       std::vector<int> layout(Nd,1); | ||||||
|  |       sgrid = new GridCartesian(dimensions,layout,processors); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     ~FFT ( void)  {  | ||||||
|  |       delete sgrid;  | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     template<class vobj> | ||||||
|  |     void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){ | ||||||
|  |  | ||||||
|  |       conformable(result._grid,vgrid); | ||||||
|  |       conformable(source._grid,vgrid); | ||||||
|  |  | ||||||
|  |       int L = vgrid->_ldimensions[dim]; | ||||||
|  |       int G = vgrid->_fdimensions[dim]; | ||||||
|  |  | ||||||
|  |       std::vector<int> layout(Nd,1); | ||||||
|  |       std::vector<int> pencil_gd(vgrid->_fdimensions); | ||||||
|  |       std::vector<int> pencil_ld(processors); | ||||||
|  |  | ||||||
|  |       pencil_gd[dim] = G*processors[dim];     | ||||||
|  |       pencil_ld[dim] = G*processors[dim];     | ||||||
|  |  | ||||||
|  |       // Pencil global vol LxLxGxLxL per node | ||||||
|  |       GridCartesian pencil_g(pencil_gd,layout,processors); | ||||||
|  |       GridCartesian pencil_l(pencil_ld,layout,processors); | ||||||
|  |  | ||||||
|  |       // Construct pencils | ||||||
|  |       typedef typename vobj::scalar_object sobj; | ||||||
|  |       Lattice<vobj> ssource(vgrid); ssource =source; | ||||||
|  |       Lattice<sobj> pgsource(&pencil_g); | ||||||
|  |       Lattice<sobj> pgresult(&pencil_g); | ||||||
|  |       Lattice<sobj> plsource(&pencil_l); | ||||||
|  |       Lattice<sobj> plresult(&pencil_l); | ||||||
|  |  | ||||||
|  |       { | ||||||
|  | 	assert(sizeof(typename sobj::scalar_type)==sizeof(ComplexD)); | ||||||
|  | 	assert(sizeof(fftw_complex)==sizeof(ComplexD)); | ||||||
|  | 	assert(sizeof(fftw_complex)==sizeof(ComplexD)); | ||||||
|  |  | ||||||
|  | 	int Ncomp = sizeof(sobj)/sizeof(fftw_complex); | ||||||
|  |  | ||||||
|  | 	int rank = 1;  /* not 2: we are computing 1d transforms */ | ||||||
|  | 	int n[] = {G}; /* 1d transforms of length G */ | ||||||
|  | 	int howmany = Ncomp; | ||||||
|  | 	int odist,idist,istride,ostride; | ||||||
|  | 	idist   = odist   = 1; | ||||||
|  | 	istride = ostride = Ncomp; /* distance between two elements in the same column */ | ||||||
|  | 	int *inembed = n, *onembed = n; | ||||||
|  |  | ||||||
|  | 	fftw_complex *in = (fftw_complex *)&plsource._odata[0]; | ||||||
|  | 	fftw_complex *out= (fftw_complex *)&plresult._odata[0]; | ||||||
|  | 	 | ||||||
|  | 	int sign = FFTW_FORWARD; | ||||||
|  | 	if (inverse) sign = FFTW_BACKWARD; | ||||||
|  |  | ||||||
|  | #ifdef HAVE_FFTW | ||||||
|  | 	fftw_plan p = fftw_plan_many_dft(rank,n,howmany, | ||||||
|  | 					 in,inembed, | ||||||
|  | 					 istride,idist, | ||||||
|  | 					 out,onembed, | ||||||
|  | 					 ostride, odist, | ||||||
|  | 					 sign,FFTW_ESTIMATE); | ||||||
|  | #else  | ||||||
|  | 	fftw_plan p ; | ||||||
|  | 	assert(0); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | 	// Barrel shift and collect global pencil | ||||||
|  | 	for(int p=0;p<processors[dim];p++) {  | ||||||
|  |  | ||||||
|  | 	  for(int idx=0;idx<sgrid->lSites();idx++) {  | ||||||
|  |  | ||||||
|  | 	    std::vector<int> lcoor(Nd); | ||||||
|  |     	    sgrid->LocalIndexToLocalCoor(idx,lcoor); | ||||||
|  |  | ||||||
|  | 	    sobj s; | ||||||
|  |  | ||||||
|  | 	    peekLocalSite(s,ssource,lcoor); | ||||||
|  |  | ||||||
|  | 	    lcoor[dim]+=p*L; | ||||||
|  | 	    | ||||||
|  | 	    pokeLocalSite(s,pgsource,lcoor); | ||||||
|  | 	  } | ||||||
|  |  | ||||||
|  | 	  ssource = Cshift(ssource,dim,L); | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	// Loop over orthog coords | ||||||
|  | 	for(int idx=0;idx<sgrid->lSites();idx++) {  | ||||||
|  |  | ||||||
|  | 	  std::vector<int> pcoor(Nd,0); | ||||||
|  | 	  std::vector<int> lcoor(Nd); | ||||||
|  | 	  sgrid->LocalIndexToLocalCoor(idx,lcoor); | ||||||
|  |  | ||||||
|  | 	  if ( lcoor[dim] == 0 ) {  // restricts loop to plane at lcoor[dim]==0 | ||||||
|  | 	   | ||||||
|  | 	    // Project to local pencil array | ||||||
|  | 	    for(int l=0;l<G;l++){ | ||||||
|  | 	      sobj s; | ||||||
|  | 	      pcoor[dim]=l; | ||||||
|  | 	      lcoor[dim]=l; | ||||||
|  | 	      peekLocalSite(s,pgsource,lcoor); | ||||||
|  | 	      pokeLocalSite(s,plsource,pcoor); | ||||||
|  | 	    } | ||||||
|  |  | ||||||
|  | 	    // FFT the pencil | ||||||
|  | #ifdef HAVE_FFTW | ||||||
|  | 	    fftw_execute(p); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | 	    // Extract the result | ||||||
|  | 	    for(int l=0;l<L;l++){ | ||||||
|  | 	      sobj s; | ||||||
|  | 	      int p = processor_coor[dim]; | ||||||
|  | 	      lcoor[dim] = l; | ||||||
|  | 	      pcoor[dim] = l+L*p; | ||||||
|  | 	      peekLocalSite(s,plresult,pcoor); | ||||||
|  | 	      pokeLocalSite(s,result,lcoor); | ||||||
|  | 	    } | ||||||
|  |  | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  | 	   | ||||||
|  | 	fftw_destroy_plan(p); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/Simd.h> | #include <Grid/Simd.h> | ||||||
| #include <Grid/Threads.h> | #include <Grid/Threads.h> | ||||||
| #include <Grid/Lexicographic.h> | #include <Grid/Lexicographic.h> | ||||||
|  | #include <Grid/Init.h> | ||||||
| #include <Grid/Communicator.h>  | #include <Grid/Communicator.h>  | ||||||
| #include <Grid/Cartesian.h>     | #include <Grid/Cartesian.h>     | ||||||
| #include <Grid/Tensors.h>       | #include <Grid/Tensors.h>       | ||||||
| @@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/parallelIO/BinaryIO.h> | #include <Grid/parallelIO/BinaryIO.h> | ||||||
| #include <Grid/qcd/QCD.h> | #include <Grid/qcd/QCD.h> | ||||||
| #include <Grid/parallelIO/NerscIO.h> | #include <Grid/parallelIO/NerscIO.h> | ||||||
| #include <Grid/Init.h> |  | ||||||
|  | #include <Grid/FFT.h> | ||||||
|  |  | ||||||
| #include <Grid/qcd/hmc/NerscCheckpointer.h> | #include <Grid/qcd/hmc/NerscCheckpointer.h> | ||||||
| #include <Grid/qcd/hmc/HmcRunner.h> | #include <Grid/qcd/hmc/HmcRunner.h> | ||||||
|   | |||||||
							
								
								
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										412
									
								
								lib/fftw/fftw3.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,412 @@ | |||||||
|  | /* | ||||||
|  |  * Copyright (c) 2003, 2007-14 Matteo Frigo | ||||||
|  |  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | ||||||
|  |  * | ||||||
|  |  * The following statement of license applies *only* to this header file, | ||||||
|  |  * and *not* to the other files distributed with FFTW or derived therefrom: | ||||||
|  |  *  | ||||||
|  |  * Redistribution and use in source and binary forms, with or without | ||||||
|  |  * modification, are permitted provided that the following conditions | ||||||
|  |  * are met: | ||||||
|  |  * | ||||||
|  |  * 1. Redistributions of source code must retain the above copyright | ||||||
|  |  *    notice, this list of conditions and the following disclaimer. | ||||||
|  |  * | ||||||
|  |  * 2. Redistributions in binary form must reproduce the above copyright | ||||||
|  |  *    notice, this list of conditions and the following disclaimer in the | ||||||
|  |  *    documentation and/or other materials provided with the distribution. | ||||||
|  |  * | ||||||
|  |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS | ||||||
|  |  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||||
|  |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||||
|  |  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | ||||||
|  |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||||
|  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE | ||||||
|  |  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||||||
|  |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||||||
|  |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||||||
|  |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||||
|  |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | /***************************** NOTE TO USERS ********************************* | ||||||
|  |  * | ||||||
|  |  *                 THIS IS A HEADER FILE, NOT A MANUAL | ||||||
|  |  * | ||||||
|  |  *    If you want to know how to use FFTW, please read the manual, | ||||||
|  |  *    online at http://www.fftw.org/doc/ and also included with FFTW. | ||||||
|  |  *    For a quick start, see the manual's tutorial section. | ||||||
|  |  * | ||||||
|  |  *   (Reading header files to learn how to use a library is a habit | ||||||
|  |  *    stemming from code lacking a proper manual.  Arguably, it's a | ||||||
|  |  *    *bad* habit in most cases, because header files can contain | ||||||
|  |  *    interfaces that are not part of the public, stable API.) | ||||||
|  |  * | ||||||
|  |  ****************************************************************************/ | ||||||
|  |  | ||||||
|  | #ifndef FFTW3_H | ||||||
|  | #define FFTW3_H | ||||||
|  |  | ||||||
|  | #include <stdio.h> | ||||||
|  |  | ||||||
|  | #ifdef __cplusplus | ||||||
|  | extern "C" | ||||||
|  | { | ||||||
|  | #endif /* __cplusplus */ | ||||||
|  |  | ||||||
|  | /* If <complex.h> is included, use the C99 complex type.  Otherwise | ||||||
|  |    define a type bit-compatible with C99 complex */ | ||||||
|  | #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) | ||||||
|  | #  define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C | ||||||
|  | #else | ||||||
|  | #  define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2] | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #define FFTW_CONCAT(prefix, name) prefix ## name | ||||||
|  | #define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name) | ||||||
|  | #define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name) | ||||||
|  | #define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name) | ||||||
|  | #define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name) | ||||||
|  |  | ||||||
|  | /* IMPORTANT: for Windows compilers, you should add a line | ||||||
|  |         #define FFTW_DLL | ||||||
|  |    here and in kernel/ifftw.h if you are compiling/using FFTW as a | ||||||
|  |    DLL, in order to do the proper importing/exporting, or | ||||||
|  |    alternatively compile with -DFFTW_DLL or the equivalent | ||||||
|  |    command-line flag.  This is not necessary under MinGW/Cygwin, where | ||||||
|  |    libtool does the imports/exports automatically. */ | ||||||
|  | #if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) | ||||||
|  |    /* annoying Windows syntax for shared-library declarations */ | ||||||
|  | #  if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */ | ||||||
|  | #    define FFTW_EXTERN extern __declspec(dllexport)  | ||||||
|  | #  else /* user is calling FFTW; import symbol */ | ||||||
|  | #    define FFTW_EXTERN extern __declspec(dllimport)  | ||||||
|  | #  endif | ||||||
|  | #else | ||||||
|  | #  define FFTW_EXTERN extern | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | enum fftw_r2r_kind_do_not_use_me { | ||||||
|  |      FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, | ||||||
|  |      FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, | ||||||
|  |      FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct fftw_iodim_do_not_use_me { | ||||||
|  |      int n;                     /* dimension size */ | ||||||
|  |      int is;			/* input stride */ | ||||||
|  |      int os;			/* output stride */ | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | #include <stddef.h> /* for ptrdiff_t */ | ||||||
|  | struct fftw_iodim64_do_not_use_me { | ||||||
|  |      ptrdiff_t n;                     /* dimension size */ | ||||||
|  |      ptrdiff_t is;			/* input stride */ | ||||||
|  |      ptrdiff_t os;			/* output stride */ | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *); | ||||||
|  | typedef int (*fftw_read_char_func_do_not_use_me)(void *); | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   huge second-order macro that defines prototypes for all API | ||||||
|  |   functions.  We expand this macro for each supported precision | ||||||
|  |   | ||||||
|  |   X: name-mangling macro | ||||||
|  |   R: real data type | ||||||
|  |   C: complex data type | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | #define FFTW_DEFINE_API(X, R, C)					   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_DEFINE_COMPLEX(R, C);						   \ | ||||||
|  | 									   \ | ||||||
|  | typedef struct X(plan_s) *X(plan);					   \ | ||||||
|  | 									   \ | ||||||
|  | typedef struct fftw_iodim_do_not_use_me X(iodim);			   \ | ||||||
|  | typedef struct fftw_iodim64_do_not_use_me X(iodim64);			   \ | ||||||
|  | 									   \ | ||||||
|  | typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind);			   \ | ||||||
|  | 									   \ | ||||||
|  | typedef fftw_write_char_func_do_not_use_me X(write_char_func);		   \ | ||||||
|  | typedef fftw_read_char_func_do_not_use_me X(read_char_func);		   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(execute)(const X(plan) p);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n,			   \ | ||||||
|  | 		    C *in, C *out, int sign, unsigned flags);		   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign,	   \ | ||||||
|  | 		       unsigned flags);					   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1,			   \ | ||||||
|  | 		       C *in, C *out, int sign, unsigned flags);	   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2,		   \ | ||||||
|  | 		       C *in, C *out, int sign, unsigned flags);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n,		   \ | ||||||
|  |                          int howmany,					   \ | ||||||
|  |                          C *in, const int *inembed,			   \ | ||||||
|  |                          int istride, int idist,			   \ | ||||||
|  |                          C *out, const int *onembed,			   \ | ||||||
|  |                          int ostride, int odist,			   \ | ||||||
|  |                          int sign, unsigned flags);			   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims,	   \ | ||||||
|  | 			 int howmany_rank,				   \ | ||||||
|  | 			 const X(iodim) *howmany_dims,			   \ | ||||||
|  | 			 C *in, C *out,					   \ | ||||||
|  | 			 int sign, unsigned flags);			   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \ | ||||||
|  | 			 int howmany_rank,				   \ | ||||||
|  | 			 const X(iodim) *howmany_dims,			   \ | ||||||
|  | 			 R *ri, R *ii, R *ro, R *io,			   \ | ||||||
|  | 			 unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank,			   \ | ||||||
|  |                          const X(iodim64) *dims,			   \ | ||||||
|  | 			 int howmany_rank,				   \ | ||||||
|  | 			 const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			 C *in, C *out,					   \ | ||||||
|  | 			 int sign, unsigned flags);			   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank,			   \ | ||||||
|  |                          const X(iodim64) *dims,			   \ | ||||||
|  | 			 int howmany_rank,				   \ | ||||||
|  | 			 const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			 R *ri, R *ii, R *ro, R *io,			   \ | ||||||
|  | 			 unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out);	   \ | ||||||
|  | FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii,	   \ | ||||||
|  |                                       R *ro, R *io);			   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n,	   \ | ||||||
|  |                              int howmany,				   \ | ||||||
|  |                              R *in, const int *inembed,			   \ | ||||||
|  |                              int istride, int idist,			   \ | ||||||
|  |                              C *out, const int *onembed,		   \ | ||||||
|  |                              int ostride, int odist,			   \ | ||||||
|  |                              unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n,		   \ | ||||||
|  |                         R *in, C *out, unsigned flags);			   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1,			   \ | ||||||
|  | 			   R *in, C *out, unsigned flags);		   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1,			   \ | ||||||
|  | 			   int n2,					   \ | ||||||
|  | 			   R *in, C *out, unsigned flags);		   \ | ||||||
|  | 									   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n,	   \ | ||||||
|  | 			     int howmany,				   \ | ||||||
|  | 			     C *in, const int *inembed,			   \ | ||||||
|  | 			     int istride, int idist,			   \ | ||||||
|  | 			     R *out, const int *onembed,		   \ | ||||||
|  | 			     int ostride, int odist,			   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n,		   \ | ||||||
|  |                         C *in, R *out, unsigned flags);			   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1,			   \ | ||||||
|  | 			   C *in, R *out, unsigned flags);		   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1,			   \ | ||||||
|  | 			   int n2,					   \ | ||||||
|  | 			   C *in, R *out, unsigned flags);		   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims,   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim) *howmany_dims,		   \ | ||||||
|  | 			     R *in, C *out,				   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims,   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim) *howmany_dims,		   \ | ||||||
|  | 			     C *in, R *out,				   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)(				   \ | ||||||
|  |                              int rank, const X(iodim) *dims,		   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim) *howmany_dims,		   \ | ||||||
|  | 			     R *in, R *ro, R *io,			   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)(				   \ | ||||||
|  |                              int rank, const X(iodim) *dims,		   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim) *howmany_dims,		   \ | ||||||
|  | 			     R *ri, R *ii, R *out,			   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank,			   \ | ||||||
|  |                              const X(iodim64) *dims,			   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			     R *in, C *out,				   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank,			   \ | ||||||
|  |                              const X(iodim64) *dims,			   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			     C *in, R *out,				   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)(			   \ | ||||||
|  |                              int rank, const X(iodim64) *dims,		   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			     R *in, R *ro, R *io,			   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)(			   \ | ||||||
|  |                              int rank, const X(iodim64) *dims,		   \ | ||||||
|  | 			     int howmany_rank,				   \ | ||||||
|  | 			     const X(iodim64) *howmany_dims,		   \ | ||||||
|  | 			     R *ri, R *ii, R *out,			   \ | ||||||
|  | 			     unsigned flags);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out);	   \ | ||||||
|  | FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p,		   \ | ||||||
|  |                                           R *in, R *ro, R *io);		   \ | ||||||
|  | FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p,		   \ | ||||||
|  |                                           R *ri, R *ii, R *out);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n,		   \ | ||||||
|  |                          int howmany,					   \ | ||||||
|  |                          R *in, const int *inembed,			   \ | ||||||
|  |                          int istride, int idist,			   \ | ||||||
|  |                          R *out, const int *onembed,			   \ | ||||||
|  |                          int ostride, int odist,			   \ | ||||||
|  |                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out,	   \ | ||||||
|  |                     const X(r2r_kind) *kind, unsigned flags);		   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out,		   \ | ||||||
|  |                        X(r2r_kind) kind, unsigned flags);		   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out,	   \ | ||||||
|  |                        X(r2r_kind) kind0, X(r2r_kind) kind1,		   \ | ||||||
|  |                        unsigned flags);					   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2,		   \ | ||||||
|  |                        R *in, R *out, X(r2r_kind) kind0,		   \ | ||||||
|  |                        X(r2r_kind) kind1, X(r2r_kind) kind2,		   \ | ||||||
|  |                        unsigned flags);					   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims,	   \ | ||||||
|  |                          int howmany_rank,				   \ | ||||||
|  |                          const X(iodim) *howmany_dims,			   \ | ||||||
|  |                          R *in, R *out,					   \ | ||||||
|  |                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims,   \ | ||||||
|  |                          int howmany_rank,				   \ | ||||||
|  |                          const X(iodim64) *howmany_dims,		   \ | ||||||
|  |                          R *in, R *out,					   \ | ||||||
|  |                          const X(r2r_kind) *kind, unsigned flags);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out);	   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(destroy_plan)(X(plan) p);				   \ | ||||||
|  | FFTW_EXTERN void X(forget_wisdom)(void);				   \ | ||||||
|  | FFTW_EXTERN void X(cleanup)(void);					   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(set_timelimit)(double t);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(plan_with_nthreads)(int nthreads);			   \ | ||||||
|  | FFTW_EXTERN int X(init_threads)(void);					   \ | ||||||
|  | FFTW_EXTERN void X(cleanup_threads)(void);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename);	   \ | ||||||
|  | FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file);		   \ | ||||||
|  | FFTW_EXTERN char *X(export_wisdom_to_string)(void);			   \ | ||||||
|  | FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char,   	   \ | ||||||
|  |                                   void *data);				   \ | ||||||
|  | FFTW_EXTERN int X(import_system_wisdom)(void);				   \ | ||||||
|  | FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename);	   \ | ||||||
|  | FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file);		   \ | ||||||
|  | FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string);	   \ | ||||||
|  | FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file);	   \ | ||||||
|  | FFTW_EXTERN void X(print_plan)(const X(plan) p);			   \ | ||||||
|  | FFTW_EXTERN char *X(sprint_plan)(const X(plan) p);			   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void *X(malloc)(size_t n);					   \ | ||||||
|  | FFTW_EXTERN R *X(alloc_real)(size_t n);					   \ | ||||||
|  | FFTW_EXTERN C *X(alloc_complex)(size_t n);				   \ | ||||||
|  | FFTW_EXTERN void X(free)(void *p);					   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN void X(flops)(const X(plan) p,				   \ | ||||||
|  |                           double *add, double *mul, double *fmas);	   \ | ||||||
|  | FFTW_EXTERN double X(estimate_cost)(const X(plan) p);			   \ | ||||||
|  | FFTW_EXTERN double X(cost)(const X(plan) p);				   \ | ||||||
|  | 									   \ | ||||||
|  | FFTW_EXTERN int X(alignment_of)(R *p);                                     \ | ||||||
|  | FFTW_EXTERN const char X(version)[];                                       \ | ||||||
|  | FFTW_EXTERN const char X(cc)[];						   \ | ||||||
|  | FFTW_EXTERN const char X(codelet_optim)[]; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* end of FFTW_DEFINE_API macro */ | ||||||
|  |  | ||||||
|  | FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) | ||||||
|  | FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) | ||||||
|  | FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) | ||||||
|  |  | ||||||
|  | /* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64 | ||||||
|  |    for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */ | ||||||
|  | #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \ | ||||||
|  |  && !(defined(__ICC) || defined(__INTEL_COMPILER)) \ | ||||||
|  |  && (defined(__i386__) || defined(__x86_64__) || defined(__ia64__)) | ||||||
|  | #  if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) | ||||||
|  | /* note: __float128 is a typedef, which is not supported with the _Complex | ||||||
|  |          keyword in gcc, so instead we use this ugly __attribute__ version. | ||||||
|  |          However, we can't simply pass the __attribute__ version to | ||||||
|  |          FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer | ||||||
|  |          types.  Hence redefining FFTW_DEFINE_COMPLEX.  Ugh. */ | ||||||
|  | #    undef FFTW_DEFINE_COMPLEX | ||||||
|  | #    define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C | ||||||
|  | #  endif | ||||||
|  | FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex) | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #define FFTW_FORWARD (-1) | ||||||
|  | #define FFTW_BACKWARD (+1) | ||||||
|  |  | ||||||
|  | #define FFTW_NO_TIMELIMIT (-1.0) | ||||||
|  |  | ||||||
|  | /* documented flags */ | ||||||
|  | #define FFTW_MEASURE (0U) | ||||||
|  | #define FFTW_DESTROY_INPUT (1U << 0) | ||||||
|  | #define FFTW_UNALIGNED (1U << 1) | ||||||
|  | #define FFTW_CONSERVE_MEMORY (1U << 2) | ||||||
|  | #define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */ | ||||||
|  | #define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */ | ||||||
|  | #define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */ | ||||||
|  | #define FFTW_ESTIMATE (1U << 6) | ||||||
|  | #define FFTW_WISDOM_ONLY (1U << 21) | ||||||
|  |  | ||||||
|  | /* undocumented beyond-guru flags */ | ||||||
|  | #define FFTW_ESTIMATE_PATIENT (1U << 7) | ||||||
|  | #define FFTW_BELIEVE_PCOST (1U << 8) | ||||||
|  | #define FFTW_NO_DFT_R2HC (1U << 9) | ||||||
|  | #define FFTW_NO_NONTHREADED (1U << 10) | ||||||
|  | #define FFTW_NO_BUFFERING (1U << 11) | ||||||
|  | #define FFTW_NO_INDIRECT_OP (1U << 12) | ||||||
|  | #define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */ | ||||||
|  | #define FFTW_NO_RANK_SPLITS (1U << 14) | ||||||
|  | #define FFTW_NO_VRANK_SPLITS (1U << 15) | ||||||
|  | #define FFTW_NO_VRECURSE (1U << 16) | ||||||
|  | #define FFTW_NO_SIMD (1U << 17) | ||||||
|  | #define FFTW_NO_SLOW (1U << 18) | ||||||
|  | #define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19) | ||||||
|  | #define FFTW_ALLOW_PRUNING (1U << 20) | ||||||
|  |  | ||||||
|  | #ifdef __cplusplus | ||||||
|  | }  /* extern "C" */ | ||||||
|  | #endif /* __cplusplus */ | ||||||
|  |  | ||||||
|  | #endif /* FFTW3_H */ | ||||||
| @@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out) | |||||||
|     assert(ig->_ldimensions[d] == og->_ldimensions[d]); |     assert(ig->_ldimensions[d] == og->_ldimensions[d]); | ||||||
|   } |   } | ||||||
|  |  | ||||||
| PARALLEL_FOR_LOOP |   //PARALLEL_FOR_LOOP | ||||||
|   for(int idx=0;idx<ig->lSites();idx++){ |   for(int idx=0;idx<ig->lSites();idx++){ | ||||||
|     std::vector<int> lcoor(ni); |     std::vector<int> lcoor(ni); | ||||||
|     ig->LocalIndexToLocalCoor(idx,lcoor); |     ig->LocalIndexToLocalCoor(idx,lcoor); | ||||||
| @@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in | |||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||||
|  | { | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   sobj s; | ||||||
|  |  | ||||||
|  |   GridBase *lg = lowDim._grid; | ||||||
|  |   GridBase *hg = higherDim._grid; | ||||||
|  |   int nl = lg->_ndimension; | ||||||
|  |   int nh = hg->_ndimension; | ||||||
|  |  | ||||||
|  |   assert(nl == nh); | ||||||
|  |   assert(orthog<nh); | ||||||
|  |   assert(orthog>=0); | ||||||
|  |  | ||||||
|  |   for(int d=0;d<nh;d++){ | ||||||
|  |     assert(lg->_processors[d]  == hg->_processors[d]); | ||||||
|  |     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // the above should guarantee that the operations are local | ||||||
|  |   //PARALLEL_FOR_LOOP | ||||||
|  |   for(int idx=0;idx<lg->lSites();idx++){ | ||||||
|  |     std::vector<int> lcoor(nl); | ||||||
|  |     std::vector<int> hcoor(nh); | ||||||
|  |     lg->LocalIndexToLocalCoor(idx,lcoor); | ||||||
|  |     if( lcoor[orthog] == slice_lo ) {  | ||||||
|  |       hcoor=lcoor; | ||||||
|  |       hcoor[orthog] = slice_hi; | ||||||
|  |       peekLocalSite(s,lowDim,lcoor); | ||||||
|  |       pokeLocalSite(s,higherDim,hcoor); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||||
|  | { | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   sobj s; | ||||||
|  |  | ||||||
|  |   GridBase *lg = lowDim._grid; | ||||||
|  |   GridBase *hg = higherDim._grid; | ||||||
|  |   int nl = lg->_ndimension; | ||||||
|  |   int nh = hg->_ndimension; | ||||||
|  |  | ||||||
|  |   assert(nl == nh); | ||||||
|  |   assert(orthog<nh); | ||||||
|  |   assert(orthog>=0); | ||||||
|  |  | ||||||
|  |   for(int d=0;d<nh;d++){ | ||||||
|  |     assert(lg->_processors[d]  == hg->_processors[d]); | ||||||
|  |     assert(lg->_ldimensions[d] == hg->_ldimensions[d]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // the above should guarantee that the operations are local | ||||||
|  |   //PARALLEL_FOR_LOOP | ||||||
|  |   for(int idx=0;idx<lg->lSites();idx++){ | ||||||
|  |     std::vector<int> lcoor(nl); | ||||||
|  |     std::vector<int> hcoor(nh); | ||||||
|  |     lg->LocalIndexToLocalCoor(idx,lcoor); | ||||||
|  |     if( lcoor[orthog] == slice_lo ) {  | ||||||
|  |       hcoor=lcoor; | ||||||
|  |       hcoor[orthog] = slice_hi; | ||||||
|  |       peekLocalSite(s,higherDim,hcoor); | ||||||
|  |       pokeLocalSite(s,lowDim,lcoor); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -388,6 +388,12 @@ class Grid_simd { | |||||||
|  |  | ||||||
| };  // end of Grid_simd class definition | };  // end of Grid_simd class definition | ||||||
|  |  | ||||||
|  |  | ||||||
|  | inline void permute(ComplexD &y,ComplexD b, int perm) {  y=b; } | ||||||
|  | inline void permute(ComplexF &y,ComplexF b, int perm) {  y=b; } | ||||||
|  | inline void permute(RealD &y,RealD b, int perm) {  y=b; } | ||||||
|  | inline void permute(RealF &y,RealF b, int perm) {  y=b; } | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
| // General rotate | // General rotate | ||||||
| //////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -67,15 +67,13 @@ template <class scalar> | |||||||
| struct AsinRealFunctor { | struct AsinRealFunctor { | ||||||
|   scalar operator()(const scalar &a) const { return asin(real(a)); } |   scalar operator()(const scalar &a) const { return asin(real(a)); } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class scalar> | template <class scalar> | ||||||
| struct LogRealFunctor { | struct LogRealFunctor { | ||||||
|   scalar operator()(const scalar &a) const { return log(real(a)); } |   scalar operator()(const scalar &a) const { return log(real(a)); } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class scalar> | template <class scalar> | ||||||
| struct ExpRealFunctor { | struct ExpFunctor { | ||||||
|   scalar operator()(const scalar &a) const { return exp(real(a)); } |   scalar operator()(const scalar &a) const { return exp(a); } | ||||||
| }; | }; | ||||||
| template <class scalar> | template <class scalar> | ||||||
| struct NotFunctor { | struct NotFunctor { | ||||||
| @@ -85,7 +83,6 @@ template <class scalar> | |||||||
| struct AbsRealFunctor { | struct AbsRealFunctor { | ||||||
|   scalar operator()(const scalar &a) const { return std::abs(real(a)); } |   scalar operator()(const scalar &a) const { return std::abs(real(a)); } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class scalar> | template <class scalar> | ||||||
| struct PowRealFunctor { | struct PowRealFunctor { | ||||||
|   double y; |   double y; | ||||||
| @@ -135,7 +132,6 @@ template <class Scalar> | |||||||
| inline Scalar rsqrt(const Scalar &r) { | inline Scalar rsqrt(const Scalar &r) { | ||||||
|   return (RSqrtRealFunctor<Scalar>(), r); |   return (RSqrtRealFunctor<Scalar>(), r); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <class S, class V> | template <class S, class V> | ||||||
| inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) { | inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) { | ||||||
|   return SimdApply(CosRealFunctor<S>(), r); |   return SimdApply(CosRealFunctor<S>(), r); | ||||||
| @@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) { | |||||||
| } | } | ||||||
| template <class S, class V> | template <class S, class V> | ||||||
| inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) { | inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) { | ||||||
|   return SimdApply(ExpRealFunctor<S>(), r); |   return SimdApply(ExpFunctor<S>(), r); | ||||||
| } | } | ||||||
| template <class S, class V> | template <class S, class V> | ||||||
| inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) { | inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) { | ||||||
|   | |||||||
							
								
								
									
										108
									
								
								tests/core/Test_fft.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								tests/core/Test_fft.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_cshift.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   std::vector<int> latt_size   = GridDefaultLatt(); | ||||||
|  |   std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1}); | ||||||
|  |   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||||
|  |  | ||||||
|  |   int vol = 1; | ||||||
|  |   for(int d=0;d<latt_size.size();d++){ | ||||||
|  |     vol = vol * latt_size[d]; | ||||||
|  |   } | ||||||
|  |   GridCartesian        Fine(latt_size,simd_layout,mpi_layout); | ||||||
|  |  | ||||||
|  |   LatticeComplexD     one(&Fine); | ||||||
|  |   LatticeComplexD      zz(&Fine); | ||||||
|  |   LatticeComplexD       C(&Fine); | ||||||
|  |   LatticeComplexD  Ctilde(&Fine); | ||||||
|  |   LatticeComplexD    coor(&Fine); | ||||||
|  |  | ||||||
|  |   LatticeSpinMatrixD    S(&Fine); | ||||||
|  |   LatticeSpinMatrixD    Stilde(&Fine); | ||||||
|  |    | ||||||
|  |   std::vector<int> p({1,2,3,2}); | ||||||
|  |  | ||||||
|  |   one = ComplexD(1.0,0.0); | ||||||
|  |   zz  = ComplexD(0.0,0.0); | ||||||
|  |  | ||||||
|  |   ComplexD ci(0.0,1.0); | ||||||
|  |  | ||||||
|  |   C=zero; | ||||||
|  |   for(int mu=0;mu<4;mu++){ | ||||||
|  |     RealD TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||||
|  |     LatticeCoordinate(coor,mu); | ||||||
|  |     C = C - (TwoPiL * p[mu]) * coor; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   C = exp(C*ci); | ||||||
|  |  | ||||||
|  |   S=zero; | ||||||
|  |   S = S+C; | ||||||
|  |  | ||||||
|  |   FFT theFFT(&Fine); | ||||||
|  |  | ||||||
|  |   theFFT.FFT_dim(Ctilde,C,0,FFT::forward);  C=Ctilde; | ||||||
|  |   theFFT.FFT_dim(Ctilde,C,1,FFT::forward);  C=Ctilde; | ||||||
|  |   theFFT.FFT_dim(Ctilde,C,2,FFT::forward);  C=Ctilde; | ||||||
|  |   theFFT.FFT_dim(Ctilde,C,3,FFT::forward); | ||||||
|  |  | ||||||
|  |   //  C=zero; | ||||||
|  |   //  Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde); | ||||||
|  |   TComplexD cVol; | ||||||
|  |   cVol()()() = vol; | ||||||
|  |  | ||||||
|  |   C=zero; | ||||||
|  |   pokeSite(cVol,C,p); | ||||||
|  |   C=C-Ctilde; | ||||||
|  |   std::cout << "diff scalar "<<norm2(C) << std::endl; | ||||||
|  |  | ||||||
|  |   theFFT.FFT_dim(Stilde,S,0,FFT::forward);  S=Stilde; | ||||||
|  |   theFFT.FFT_dim(Stilde,S,1,FFT::forward);  S=Stilde; | ||||||
|  |   theFFT.FFT_dim(Stilde,S,2,FFT::forward);  S=Stilde; | ||||||
|  |   theFFT.FFT_dim(Stilde,S,3,FFT::forward); | ||||||
|  |  | ||||||
|  |   SpinMatrixD Sp;  | ||||||
|  |   Sp = zero; Sp = Sp+cVol; | ||||||
|  |  | ||||||
|  |   S=zero; | ||||||
|  |   pokeSite(Sp,S,p); | ||||||
|  |  | ||||||
|  |   S= S-Stilde; | ||||||
|  |   std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl; | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user