mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-01 04:24:32 +00:00 
			
		
		
		
	Compare commits
	
		
			9 Commits
		
	
	
		
			feature/no
			...
			bugfix/dmi
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | d0c2c9c71f | ||
|  | c8cafa77ca | ||
|  | a3bcad3804 | ||
|  | 5a5b66292b | ||
|  | e63be32ad2 | ||
|  | 6aa106d906 | ||
|  | 33d59c8869 | ||
|  | a833fd8dbf | ||
|  | e9712bc7fb | 
							
								
								
									
										13
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -7,7 +7,7 @@ cache: | |||||||
| matrix: | matrix: | ||||||
|   include: |   include: | ||||||
|     - os:        osx |     - os:        osx | ||||||
|       osx_image: xcode8.3 |       osx_image: xcode7.2 | ||||||
|       compiler: clang |       compiler: clang | ||||||
|     - compiler: gcc |     - compiler: gcc | ||||||
|       addons: |       addons: | ||||||
| @@ -73,6 +73,8 @@ before_install: | |||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi | ||||||
|  |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi | ||||||
|      |      | ||||||
| install: | install: | ||||||
|     - export CC=$CC$VERSION |     - export CC=$CC$VERSION | ||||||
| @@ -90,14 +92,15 @@ script: | |||||||
|     - cd build |     - cd build | ||||||
|     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4  |     - make -j4  | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none |     - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none | ||||||
|     - make -j4 |     - make -j4 | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |     - ./benchmarks/Benchmark_dwf --threads 1 | ||||||
|     - echo make clean |     - echo make clean | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi |     - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto | ||||||
|  |     - make -j4 | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi |     - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										63
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										63
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,28 +1,6 @@ | |||||||
| TODO: | TODO: | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
| Peter's work list: |  | ||||||
|  |  | ||||||
| -- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started |  | ||||||
| -- Merge high precision reduction into develop         <-- done |  | ||||||
| -- Precision conversion and sort out localConvert      <--  |  | ||||||
| -- Physical propagator interface |  | ||||||
|  |  | ||||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination |  | ||||||
|    -- slice* linalg routines for multiRHS, BlockCG        <-- started |  | ||||||
|  |  | ||||||
| -- Profile CG, BlockCG, etc... Flop count/rate |  | ||||||
| -- Binary I/O speed up & x-strips |  | ||||||
| -- Half-precision comms                                <-- started |  | ||||||
| -- GaugeFix into central location |  | ||||||
| -- FFTfix in sensible place |  | ||||||
| -- Multigrid Wilson and DWF, compare to other Multigrid implementations |  | ||||||
| -- quaternions                 -- Might not need |  | ||||||
|  |  | ||||||
|  |  | ||||||
| -- Conserved currents |  | ||||||
|  |  | ||||||
| ----- |  | ||||||
| * Forces; the UdSdU  term in gauge force term is half of what I think it should | * Forces; the UdSdU  term in gauge force term is half of what I think it should | ||||||
|   be. This is a consequence of taking ONLY the first term in: |   be. This is a consequence of taking ONLY the first term in: | ||||||
|  |  | ||||||
| @@ -43,8 +21,16 @@ Peter's work list: | |||||||
|   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. |   This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. | ||||||
|   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. |   This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Policies: | ||||||
|  |  | ||||||
|  | * Link smearing/boundary conds; Policy class based implementation ; framework more in place | ||||||
|  |  | ||||||
| * Support different boundary conditions (finite temp, chem. potential ... ) | * Support different boundary conditions (finite temp, chem. potential ... ) | ||||||
|  |  | ||||||
|  | * Support different fermion representations?  | ||||||
|  |   - contained entirely within the integrator presently | ||||||
|  |  | ||||||
| - Sign of force term. | - Sign of force term. | ||||||
|  |  | ||||||
| - Reversibility test. | - Reversibility test. | ||||||
| @@ -55,6 +41,11 @@ Peter's work list: | |||||||
|  |  | ||||||
| - Audit oIndex usage for cb behaviour | - Audit oIndex usage for cb behaviour | ||||||
|  |  | ||||||
|  | - Rectangle gauge actions. | ||||||
|  |   Iwasaki, | ||||||
|  |   Symanzik, | ||||||
|  |   ... etc... | ||||||
|  |  | ||||||
| - Prepare multigrid for HMC. - Alternate setup schemes. | - Prepare multigrid for HMC. - Alternate setup schemes. | ||||||
|  |  | ||||||
| - Support for ILDG --- ugly, not done | - Support for ILDG --- ugly, not done | ||||||
| @@ -64,11 +55,9 @@ Peter's work list: | |||||||
| - FFTnD ? | - FFTnD ? | ||||||
|  |  | ||||||
| - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | - Gparity; hand opt use template specialisation elegance to enable the optimised paths ? | ||||||
|  |  | ||||||
| - Gparity force term; Gparity (R)HMC. | - Gparity force term; Gparity (R)HMC. | ||||||
|  | - Random number state save restore | ||||||
| - Mobius implementation clean up to rmove #if 0 stale code sequences | - Mobius implementation clean up to rmove #if 0 stale code sequences | ||||||
|  |  | ||||||
| - CG -- profile carefully, kernel fusion, whole CG performance measurements. | - CG -- profile carefully, kernel fusion, whole CG performance measurements. | ||||||
|  |  | ||||||
| ================================================================ | ================================================================ | ||||||
| @@ -101,7 +90,6 @@ Insert/Extract | |||||||
| Not sure of status of this -- reverify. Things are working nicely now though. | Not sure of status of this -- reverify. Things are working nicely now though. | ||||||
|  |  | ||||||
| * Make the Tensor types and Complex etc... play more nicely. | * Make the Tensor types and Complex etc... play more nicely. | ||||||
|  |  | ||||||
|   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > |   - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > | ||||||
|     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I |     QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I | ||||||
|     want to introduce a syntax that does not require this. |     want to introduce a syntax that does not require this. | ||||||
| @@ -124,8 +112,6 @@ Not sure of status of this -- reverify. Things are working nicely now though. | |||||||
| RECENT | RECENT | ||||||
| --------------- | --------------- | ||||||
|  |  | ||||||
|   - Support different fermion representations? -- DONE |  | ||||||
|   - contained entirely within the integrator presently |  | ||||||
|   - Clean up HMC                                                             -- DONE |   - Clean up HMC                                                             -- DONE | ||||||
|   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE |   - LorentzScalar<GaugeField> gets Gauge link type (cleaner).                -- DONE | ||||||
|   - Simplified the integrators a bit.                                        -- DONE |   - Simplified the integrators a bit.                                        -- DONE | ||||||
| @@ -137,26 +123,6 @@ RECENT | |||||||
|   - Parallel io improvements                                  -- DONE |   - Parallel io improvements                                  -- DONE | ||||||
|   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE |   - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE | ||||||
|  |  | ||||||
|  |  | ||||||
| DONE: |  | ||||||
| - MultiArray -- MultiRHS done |  | ||||||
| - ConjugateGradientMultiShift -- DONE |  | ||||||
| - MCR                         -- DONE |  | ||||||
| - Remez -- Mike or Boost?     -- DONE |  | ||||||
| - Proto (ET)                  -- DONE |  | ||||||
| - uBlas                       -- DONE ; Eigen |  | ||||||
| - Potentially Useful Boost libraries -- DONE ; Eigen |  | ||||||
| - Aligned allocator; memory pool -- DONE |  | ||||||
| - Multiprecision              -- DONE |  | ||||||
| - Serialization               -- DONE |  | ||||||
| - Regex -- Not needed |  | ||||||
| - Tokenize -- Why? |  | ||||||
|  |  | ||||||
| - Random number state save restore -- DONE |  | ||||||
| - Rectangle gauge actions. -- DONE |  | ||||||
|   Iwasaki, |  | ||||||
|   Symanzik, |  | ||||||
|   ... etc... |  | ||||||
| Done: Cayley, Partial , ContFrac force terms. | Done: Cayley, Partial , ContFrac force terms. | ||||||
|  |  | ||||||
| DONE | DONE | ||||||
| @@ -241,7 +207,6 @@ Done | |||||||
| FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) | ||||||
| ====================================================================================================== | ====================================================================================================== | ||||||
|  |  | ||||||
| * Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE |  | ||||||
| * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE | ||||||
|   user pass these? Is this a QCD specific? |   user pass these? Is this a QCD specific? | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										21
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								configure.ac
									
									
									
									
									
								
							| @@ -83,19 +83,6 @@ case ${ac_LAPACK} in | |||||||
|         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; |         AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; | ||||||
| esac | esac | ||||||
|  |  | ||||||
| ############### FP16 conversions |  | ||||||
| AC_ARG_ENABLE([fp16], |  | ||||||
|     [AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])],  |  | ||||||
|     [ac_FP16=${enable_fp16}], [ac_FP16=no]) |  | ||||||
| case ${ac_FP16} in |  | ||||||
|     no) |  | ||||||
|         ;; |  | ||||||
|     yes) |  | ||||||
|         AC_DEFINE([USE_FP16],[1],[conversion to fp16]);; |  | ||||||
|     *) |  | ||||||
| 	;; |  | ||||||
| esac |  | ||||||
|  |  | ||||||
| ############### MKL | ############### MKL | ||||||
| AC_ARG_ENABLE([mkl], | AC_ARG_ENABLE([mkl], | ||||||
|     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], |     [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], | ||||||
| @@ -192,16 +179,16 @@ case ${ax_cv_cxx_compiler_vendor} in | |||||||
|         SIMD_FLAGS='-msse4.2';; |         SIMD_FLAGS='-msse4.2';; | ||||||
|       AVX) |       AVX) | ||||||
|         AC_DEFINE([AVX1],[1],[AVX intrinsics]) |         AC_DEFINE([AVX1],[1],[AVX intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx -mf16c';; |         SIMD_FLAGS='-mavx';; | ||||||
|       AVXFMA4) |       AVXFMA4) | ||||||
|         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) |         AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) | ||||||
|         SIMD_FLAGS='-mavx -mfma4 -mf16c';; |         SIMD_FLAGS='-mavx -mfma4';; | ||||||
|       AVXFMA) |       AVXFMA) | ||||||
|         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) |         AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) | ||||||
|         SIMD_FLAGS='-mavx -mfma -mf16c';; |         SIMD_FLAGS='-mavx -mfma';; | ||||||
|       AVX2) |       AVX2) | ||||||
|         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) |         AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx2 -mfma -mf16c';; |         SIMD_FLAGS='-mavx2 -mfma';; | ||||||
|       AVX512) |       AVX512) | ||||||
|         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) |         AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) | ||||||
|         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; |         SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
|  |  | ||||||
| // Lanczos support | // Lanczos support | ||||||
| //#include <Grid/algorithms/iterative/MatrixUtils.h> | #include <Grid/algorithms/iterative/MatrixUtils.h> | ||||||
| #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> | ||||||
| #include <Grid/algorithms/CoarsenedMatrix.h> | #include <Grid/algorithms/CoarsenedMatrix.h> | ||||||
| #include <Grid/algorithms/FFT.h> | #include <Grid/algorithms/FFT.h> | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -1,366 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H |  | ||||||
| #define GRID_BLOCK_CONJUGATE_GRADIENT_H |  | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // Block conjugate gradient. Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| template <class Field> |  | ||||||
| class BlockConjugateGradient : public OperatorFunction<Field> { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   typedef typename Field::scalar_type scomplex; |  | ||||||
|  |  | ||||||
|   const int blockDim = 0; |  | ||||||
|  |  | ||||||
|   int Nblock; |  | ||||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |  | ||||||
|                            // Defaults true. |  | ||||||
|   RealD Tolerance; |  | ||||||
|   Integer MaxIterations; |  | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |  | ||||||
|    |  | ||||||
|   BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) |  | ||||||
|     : Tolerance(tol), |  | ||||||
|     MaxIterations(maxit), |  | ||||||
|     ErrorOnNoConverge(err_on_no_conv){}; |  | ||||||
|  |  | ||||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = 0; // First dimension is block dim |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   Psi.checkerboard = Src.checkerboard; |  | ||||||
|   conformable(Psi, Src); |  | ||||||
|  |  | ||||||
|   Field P(Src); |  | ||||||
|   Field AP(Src); |  | ||||||
|   Field R(Src); |  | ||||||
|    |  | ||||||
|   Eigen::MatrixXcd m_pAp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   Eigen::MatrixXcd m_alpha      = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|   Eigen::MatrixXcd m_beta   = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,Src,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Src,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Psi,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   // Initial search dir is guess |  | ||||||
|   Linop.HermOp(Psi, AP); |  | ||||||
|    |  | ||||||
|  |  | ||||||
|   /************************************************************************ |  | ||||||
|    * Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980) |  | ||||||
|    ************************************************************************ |  | ||||||
|    * O'Leary : R = B - A X |  | ||||||
|    * O'Leary : P = M R ; preconditioner M = 1 |  | ||||||
|    * O'Leary : alpha = PAP^{-1} RMR |  | ||||||
|    * O'Leary : beta  = RMR^{-1}_old RMR_new |  | ||||||
|    * O'Leary : X=X+Palpha |  | ||||||
|    * O'Leary : R_new=R_old-AP alpha |  | ||||||
|    * O'Leary : P=MR_new+P beta |  | ||||||
|    */ |  | ||||||
|  |  | ||||||
|   R = Src - AP;   |  | ||||||
|   P = R; |  | ||||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|  |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|  |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b)); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; |  | ||||||
|  |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(P, AP); |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Alpha |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_pAp,P,AP,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_pAp_inv = m_pAp.inverse(); |  | ||||||
|     m_alpha   = m_pAp_inv * m_rr ; |  | ||||||
|  |  | ||||||
|     // Psi, R update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog);     // add alpha *  P to psi |  | ||||||
|     sliceMaddMatrix(R  ,m_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Beta |  | ||||||
|     m_rr_inv = m_rr.inverse(); |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductMatrix(m_rr,R,R,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     m_beta = m_rr_inv *m_rr; |  | ||||||
|  |  | ||||||
|     // Search update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddMatrix(AP,m_beta,P,R,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|     P= AP; |  | ||||||
|  |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       RealD rr = real(m_rr(b,b))/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |  | ||||||
|       AP = AP-Src; |  | ||||||
|       std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
| 	     |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction |  | ||||||
| ////////////////////////////////////////////////////////////////////////// |  | ||||||
| template <class Field> |  | ||||||
| class MultiRHSConjugateGradient : public OperatorFunction<Field> { |  | ||||||
|  public: |  | ||||||
|  |  | ||||||
|   typedef typename Field::scalar_type scomplex; |  | ||||||
|  |  | ||||||
|   const int blockDim = 0; |  | ||||||
|  |  | ||||||
|   int Nblock; |  | ||||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. |  | ||||||
|                            // Defaults true. |  | ||||||
|   RealD Tolerance; |  | ||||||
|   Integer MaxIterations; |  | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |  | ||||||
|    |  | ||||||
|    MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) |  | ||||||
|     : Tolerance(tol), |  | ||||||
|     MaxIterations(maxit), |  | ||||||
|     ErrorOnNoConverge(err_on_no_conv){}; |  | ||||||
|  |  | ||||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  |  | ||||||
| { |  | ||||||
|   int Orthog = 0; // First dimension is block dim |  | ||||||
|   Nblock = Src._grid->_fdimensions[Orthog]; |  | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; |  | ||||||
|  |  | ||||||
|   Psi.checkerboard = Src.checkerboard; |  | ||||||
|   conformable(Psi, Src); |  | ||||||
|  |  | ||||||
|   Field P(Src); |  | ||||||
|   Field AP(Src); |  | ||||||
|   Field R(Src); |  | ||||||
|    |  | ||||||
|   std::vector<ComplexD> v_pAp(Nblock); |  | ||||||
|   std::vector<RealD> v_rr (Nblock); |  | ||||||
|   std::vector<RealD> v_rr_inv(Nblock); |  | ||||||
|   std::vector<RealD> v_alpha(Nblock); |  | ||||||
|   std::vector<RealD> v_beta(Nblock); |  | ||||||
|  |  | ||||||
|   // Initial residual computation & set up |  | ||||||
|   std::vector<RealD> residuals(Nblock); |  | ||||||
|   std::vector<RealD> ssq(Nblock); |  | ||||||
|  |  | ||||||
|   sliceNorm(ssq,Src,Orthog); |  | ||||||
|   RealD sssum=0; |  | ||||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Src,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   sliceNorm(residuals,Psi,Orthog); |  | ||||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } |  | ||||||
|  |  | ||||||
|   // Initial search dir is guess |  | ||||||
|   Linop.HermOp(Psi, AP); |  | ||||||
|  |  | ||||||
|   R = Src - AP;   |  | ||||||
|   P = R; |  | ||||||
|   sliceNorm(v_rr,R,Orthog); |  | ||||||
|  |  | ||||||
|   GridStopWatch sliceInnerTimer; |  | ||||||
|   GridStopWatch sliceMaddTimer; |  | ||||||
|   GridStopWatch sliceNormTimer; |  | ||||||
|   GridStopWatch MatrixTimer; |  | ||||||
|   GridStopWatch SolverTimer; |  | ||||||
|  |  | ||||||
|   SolverTimer.Start(); |  | ||||||
|   int k; |  | ||||||
|   for (k = 1; k <= MaxIterations; k++) { |  | ||||||
|  |  | ||||||
|     RealD rrsum=0; |  | ||||||
|     for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]); |  | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum |  | ||||||
| 	      <<" / "<<std::sqrt(rrsum/sssum) <<std::endl; |  | ||||||
|  |  | ||||||
|     MatrixTimer.Start(); |  | ||||||
|     Linop.HermOp(P, AP); |  | ||||||
|     MatrixTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Alpha |  | ||||||
|     //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); |  | ||||||
|     sliceInnerTimer.Start(); |  | ||||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); |  | ||||||
|     sliceInnerTimer.Stop(); |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl; |  | ||||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Psi, R update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddVector(Psi,v_alpha, P,Psi,Orthog);     // add alpha *  P to psi |  | ||||||
|     sliceMaddVector(R  ,v_alpha,AP,  R,Orthog,-1.0);// sub alpha * AP to resid |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     // Beta |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       v_rr_inv[b] = 1.0/v_rr[b]; |  | ||||||
|     } |  | ||||||
|     sliceNormTimer.Start(); |  | ||||||
|     sliceNorm(v_rr,R,Orthog); |  | ||||||
|     sliceNormTimer.Stop(); |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       v_beta[b] = v_rr_inv[b] *v_rr[b]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Search update |  | ||||||
|     sliceMaddTimer.Start(); |  | ||||||
|     sliceMaddVector(P,v_beta,P,R,Orthog); |  | ||||||
|     sliceMaddTimer.Stop(); |  | ||||||
|  |  | ||||||
|     /********************* |  | ||||||
|      * convergence monitor |  | ||||||
|      ********************* |  | ||||||
|      */ |  | ||||||
|     RealD max_resid=0; |  | ||||||
|     for(int b=0;b<Nblock;b++){ |  | ||||||
|       RealD rr = v_rr[b]/ssq[b]; |  | ||||||
|       if ( rr > max_resid ) max_resid = rr; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     if ( max_resid < Tolerance*Tolerance ) {  |  | ||||||
|  |  | ||||||
|       SolverTimer.Stop(); |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; |  | ||||||
|       for(int b=0;b<Nblock;b++){ |  | ||||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; |  | ||||||
|       } |  | ||||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; |  | ||||||
|  |  | ||||||
|       Linop.HermOp(Psi, AP); |  | ||||||
|       AP = AP-Src; |  | ||||||
|       std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; |  | ||||||
|  |  | ||||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tNorm       " << sliceNormTimer.Elapsed() <<std::endl; |  | ||||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|       IterationsToComplete = k; |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl; |  | ||||||
|  |  | ||||||
|   if (ErrorOnNoConverge) assert(0); |  | ||||||
|   IterationsToComplete = k; |  | ||||||
| } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| @@ -78,12 +78,18 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     cp = a; |     cp = a; | ||||||
|     ssq = norm2(src); |     ssq = norm2(src); | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   src " << ssq << std::endl; |               << "ConjugateGradient: guess " << guess << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:    mp " << d << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:   mmp " << b << std::endl; |               << "ConjugateGradient:   src " << ssq << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:  cp,r " << cp << std::endl; |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient:     p " << a << std::endl; |               << "ConjugateGradient:    mp " << d << std::endl; | ||||||
|  |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|  |               << "ConjugateGradient:   mmp " << b << std::endl; | ||||||
|  |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|  |               << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||||
|  |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|  |               << "ConjugateGradient:     p " << a << std::endl; | ||||||
|  |  | ||||||
|     RealD rsq = Tolerance * Tolerance * ssq; |     RealD rsq = Tolerance * Tolerance * ssq; | ||||||
|  |  | ||||||
| @@ -93,7 +99,8 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << std::setprecision(4) | ||||||
|               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; |               << "ConjugateGradient: k=0 residual " << cp << " target " << rsq | ||||||
|  |               << std::endl; | ||||||
|  |  | ||||||
|     GridStopWatch LinalgTimer; |     GridStopWatch LinalgTimer; | ||||||
|     GridStopWatch MatrixTimer; |     GridStopWatch MatrixTimer; | ||||||
| @@ -138,20 +145,19 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|         RealD resnorm = sqrt(norm2(p)); |         RealD resnorm = sqrt(norm2(p)); | ||||||
|         RealD true_residual = resnorm / srcnorm; |         RealD true_residual = resnorm / srcnorm; | ||||||
|  |  | ||||||
|         std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; |         std::cout << GridLogMessage | ||||||
|         std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; |                   << "ConjugateGradient: Converged on iteration " << k << std::endl; | ||||||
| 	std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; |         std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) | ||||||
| 	std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; |                   << " true residual " << true_residual << " target " | ||||||
|  |                   << Tolerance << std::endl; | ||||||
|         std::cout << GridLogMessage << "Time breakdown "<<std::endl; |         std::cout << GridLogMessage << "Time elapsed: Iterations " | ||||||
| 	std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed() <<std::endl; |                   << SolverTimer.Elapsed() << " Matrix  " | ||||||
| 	std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed() <<std::endl; |                   << MatrixTimer.Elapsed() << " Linalg " | ||||||
| 	std::cout << GridLogMessage << "\tLinalg     " << LinalgTimer.Elapsed() <<std::endl; |                   << LinalgTimer.Elapsed(); | ||||||
|  |         std::cout << std::endl; | ||||||
|  |  | ||||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); |         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||||
|  |  | ||||||
| 	IterationsToComplete = k;	 | 	IterationsToComplete = k;	 | ||||||
|  |  | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -30,17 +30,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #define GRID_IRL_H | #define GRID_IRL_H | ||||||
|  |  | ||||||
| #include <string.h> //memset | #include <string.h> //memset | ||||||
|  |  | ||||||
| #ifdef USE_LAPACK | #ifdef USE_LAPACK | ||||||
|  | #ifdef USE_MKL | ||||||
|  | #include<mkl_lapack.h> | ||||||
|  | #else | ||||||
| void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, | void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, | ||||||
|                    double *vl, double *vu, int *il, int *iu, double *abstol, |                    double *vl, double *vu, int *il, int *iu, double *abstol, | ||||||
|                    int *m, double *w, double *z, int *ldz, int *isuppz, |                    int *m, double *w, double *z, int *ldz, int *isuppz, | ||||||
|                    double *work, int *lwork, int *iwork, int *liwork, |                    double *work, int *lwork, int *iwork, int *liwork, | ||||||
|                    int *info); |                    int *info); | ||||||
|  | //#include <lapacke/lapacke.h> | ||||||
| #endif | #endif | ||||||
|  | #endif | ||||||
| #include <Grid/algorithms/densematrix/DenseMatrix.h> | #include "DenseMatrix.h" | ||||||
| #include <Grid/algorithms/iterative/EigenSort.h> | #include "EigenSort.h" | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -64,12 +67,13 @@ public: | |||||||
|     int Np;      // Np -- Number of spare vecs in kryloc space |     int Np;      // Np -- Number of spare vecs in kryloc space | ||||||
|     int Nm;      // Nm -- total number of vectors |     int Nm;      // Nm -- total number of vectors | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     RealD OrthoTime; | ||||||
|  |  | ||||||
|     RealD eresid; |     RealD eresid; | ||||||
|  |  | ||||||
|     SortEigen<Field> _sort; |     SortEigen<Field> _sort; | ||||||
|  |  | ||||||
| //    GridCartesian &_fgrid; |  | ||||||
|  |  | ||||||
|     LinearOperatorBase<Field> &_Linop; |     LinearOperatorBase<Field> &_Linop; | ||||||
|  |  | ||||||
|     OperatorFunction<Field>   &_poly; |     OperatorFunction<Field>   &_poly; | ||||||
| @@ -126,23 +130,23 @@ public: | |||||||
|  |  | ||||||
|       GridBase *grid = evec[0]._grid; |       GridBase *grid = evec[0]._grid; | ||||||
|       Field w(grid); |       Field w(grid); | ||||||
|       std::cout << "RitzMatrix "<<std::endl; |       std::cout<<GridLogMessage << "RitzMatrix "<<std::endl; | ||||||
|       for(int i=0;i<k;i++){ |       for(int i=0;i<k;i++){ | ||||||
| 	_poly(_Linop,evec[i],w); | 	_poly(_Linop,evec[i],w); | ||||||
| 	std::cout << "["<<i<<"] "; | 	std::cout<<GridLogMessage << "["<<i<<"] "; | ||||||
| 	for(int j=0;j<k;j++){ | 	for(int j=0;j<k;j++){ | ||||||
| 	  ComplexD in = innerProduct(evec[j],w); | 	  ComplexD in = innerProduct(evec[j],w); | ||||||
| 	  if ( fabs((double)i-j)>1 ) {  | 	  if ( fabs((double)i-j)>1 ) {  | ||||||
| 	    if (abs(in) >1.0e-9 )  {  | 	    if (abs(in) >1.0e-9 )  {  | ||||||
| 	      std::cout<<"oops"<<std::endl; | 	      std::cout<<GridLogMessage<<"oops"<<std::endl; | ||||||
| 	      abort(); | 	      abort(); | ||||||
| 	    } else  | 	    } else  | ||||||
| 	      std::cout << " 0 "; | 	      std::cout<<GridLogMessage << " 0 "; | ||||||
| 	  } else {  | 	  } else {  | ||||||
| 	    std::cout << " "<<in<<" "; | 	    std::cout<<GridLogMessage << " "<<in<<" "; | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| 	std::cout << std::endl; | 	std::cout<<GridLogMessage << std::endl; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -176,10 +180,10 @@ public: | |||||||
|       RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop |       RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop | ||||||
|                                  // 7. vk+1 := wk/βk+1 |                                  // 7. vk+1 := wk/βk+1 | ||||||
|  |  | ||||||
| //	std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl; | 	std::cout<<GridLogMessage << "alpha = " << zalph << " beta "<<beta<<std::endl; | ||||||
|       const RealD tiny = 1.0e-20; |       const RealD tiny = 1.0e-20; | ||||||
|       if ( beta < tiny ) {  |       if ( beta < tiny ) {  | ||||||
| 	std::cout << " beta is tiny "<<beta<<std::endl; | 	std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl; | ||||||
|      } |      } | ||||||
|       lmd[k] = alph; |       lmd[k] = alph; | ||||||
|       lme[k]  = beta; |       lme[k]  = beta; | ||||||
| @@ -255,6 +259,7 @@ public: | |||||||
|     } |     } | ||||||
|  |  | ||||||
| #ifdef USE_LAPACK | #ifdef USE_LAPACK | ||||||
|  | #define LAPACK_INT long long | ||||||
|     void diagonalize_lapack(DenseVector<RealD>& lmd, |     void diagonalize_lapack(DenseVector<RealD>& lmd, | ||||||
| 		     DenseVector<RealD>& lme,  | 		     DenseVector<RealD>& lme,  | ||||||
| 		     int N1, | 		     int N1, | ||||||
| @@ -264,7 +269,7 @@ public: | |||||||
|   const int size = Nm; |   const int size = Nm; | ||||||
| //  tevals.resize(size); | //  tevals.resize(size); | ||||||
| //  tevecs.resize(size); | //  tevecs.resize(size); | ||||||
|   int NN = N1; |   LAPACK_INT NN = N1; | ||||||
|   double evals_tmp[NN]; |   double evals_tmp[NN]; | ||||||
|   double evec_tmp[NN][NN]; |   double evec_tmp[NN][NN]; | ||||||
|   memset(evec_tmp[0],0,sizeof(double)*NN*NN); |   memset(evec_tmp[0],0,sizeof(double)*NN*NN); | ||||||
| @@ -278,19 +283,19 @@ public: | |||||||
|         if (i==j) evals_tmp[i] = lmd[i]; |         if (i==j) evals_tmp[i] = lmd[i]; | ||||||
|         if (j==(i-1)) EE[j] = lme[j]; |         if (j==(i-1)) EE[j] = lme[j]; | ||||||
|       } |       } | ||||||
|   int evals_found; |   LAPACK_INT evals_found; | ||||||
|   int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; |   LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ; | ||||||
|   int liwork =  3+NN*10 ; |   LAPACK_INT liwork =  3+NN*10 ; | ||||||
|   int iwork[liwork]; |   LAPACK_INT iwork[liwork]; | ||||||
|   double work[lwork]; |   double work[lwork]; | ||||||
|   int isuppz[2*NN]; |   LAPACK_INT isuppz[2*NN]; | ||||||
|   char jobz = 'V'; // calculate evals & evecs |   char jobz = 'V'; // calculate evals & evecs | ||||||
|   char range = 'I'; // calculate all evals |   char range = 'I'; // calculate all evals | ||||||
|   //    char range = 'A'; // calculate all evals |   //    char range = 'A'; // calculate all evals | ||||||
|   char uplo = 'U'; // refer to upper half of original matrix |   char uplo = 'U'; // refer to upper half of original matrix | ||||||
|   char compz = 'I'; // Compute eigenvectors of tridiagonal matrix |   char compz = 'I'; // Compute eigenvectors of tridiagonal matrix | ||||||
|   int ifail[NN]; |   int ifail[NN]; | ||||||
|   int info; |   long long info; | ||||||
| //  int total = QMP_get_number_of_nodes(); | //  int total = QMP_get_number_of_nodes(); | ||||||
| //  int node = QMP_get_node_number(); | //  int node = QMP_get_node_number(); | ||||||
| //  GridBase *grid = evec[0]._grid; | //  GridBase *grid = evec[0]._grid; | ||||||
| @@ -298,14 +303,18 @@ public: | |||||||
|   int node = grid->_processor; |   int node = grid->_processor; | ||||||
|   int interval = (NN/total)+1; |   int interval = (NN/total)+1; | ||||||
|   double vl = 0.0, vu = 0.0; |   double vl = 0.0, vu = 0.0; | ||||||
|   int il = interval*node+1 , iu = interval*(node+1); |   LAPACK_INT il = interval*node+1 , iu = interval*(node+1); | ||||||
|   if (iu > NN)  iu=NN; |   if (iu > NN)  iu=NN; | ||||||
|   double tol = 0.0; |   double tol = 0.0; | ||||||
|     if (1) { |     if (1) { | ||||||
|       memset(evals_tmp,0,sizeof(double)*NN); |       memset(evals_tmp,0,sizeof(double)*NN); | ||||||
|       if ( il <= NN){ |       if ( il <= NN){ | ||||||
|         printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); |         printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu); | ||||||
|  | #ifdef USE_MKL | ||||||
|  |         dstegr(&jobz, &range, &NN, | ||||||
|  | #else | ||||||
|         LAPACK_dstegr(&jobz, &range, &NN, |         LAPACK_dstegr(&jobz, &range, &NN, | ||||||
|  | #endif | ||||||
|             (double*)DD, (double*)EE, |             (double*)DD, (double*)EE, | ||||||
|             &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' |             &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A' | ||||||
|             &tol, // tolerance |             &tol, // tolerance | ||||||
| @@ -337,6 +346,7 @@ public: | |||||||
|       lmd [NN-1-i]=evals_tmp[i]; |       lmd [NN-1-i]=evals_tmp[i]; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  | #undef LAPACK_INT  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -367,12 +377,14 @@ public: | |||||||
| //	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); | //	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|       int Niter = 100*N1; |       int Niter = 10000*N1; | ||||||
|       int kmin = 1; |       int kmin = 1; | ||||||
|       int kmax = N2; |       int kmax = N2; | ||||||
|       // (this should be more sophisticated) |       // (this should be more sophisticated) | ||||||
|  |  | ||||||
|       for(int iter=0; iter<Niter; ++iter){ |       for(int iter=0; ; ++iter){ | ||||||
|  |       if ( (iter+1)%(100*N1)==0)  | ||||||
|  |       std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n"; | ||||||
|  |  | ||||||
| 	// determination of 2x2 leading submatrix | 	// determination of 2x2 leading submatrix | ||||||
| 	RealD dsub = lmd[kmax-1]-lmd[kmax-2]; | 	RealD dsub = lmd[kmax-1]-lmd[kmax-2]; | ||||||
| @@ -401,11 +413,11 @@ public: | |||||||
|         _sort.push(lmd3,N2); |         _sort.push(lmd3,N2); | ||||||
|         _sort.push(lmd2,N2); |         _sort.push(lmd2,N2); | ||||||
|          for(int k=0; k<N2; ++k){ |          for(int k=0; k<N2; ++k){ | ||||||
| 	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; | 	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl; | ||||||
| //	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; | //	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl; | ||||||
| 	  } | 	  } | ||||||
|          for(int k=0; k<N1*N1; ++k){ |          for(int k=0; k<N1*N1; ++k){ | ||||||
| //	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; | //	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl; | ||||||
| 	} | 	} | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
| @@ -420,7 +432,7 @@ public: | |||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|       std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; |       std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n"; | ||||||
|       abort(); |       abort(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -437,6 +449,7 @@ public: | |||||||
| 		       DenseVector<Field>& evec, | 		       DenseVector<Field>& evec, | ||||||
| 		       int k) | 		       int k) | ||||||
|     { |     { | ||||||
|  |       double t0=-usecond()/1e6; | ||||||
|       typedef typename Field::scalar_type MyComplex; |       typedef typename Field::scalar_type MyComplex; | ||||||
|       MyComplex ip; |       MyComplex ip; | ||||||
|  |  | ||||||
| @@ -455,6 +468,8 @@ public: | |||||||
| 	w = w - ip * evec[j]; | 	w = w - ip * evec[j]; | ||||||
|       } |       } | ||||||
|       normalise(w); |       normalise(w); | ||||||
|  |       t0+=usecond()/1e6; | ||||||
|  |       OrthoTime +=t0; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) { |     void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) { | ||||||
| @@ -488,10 +503,10 @@ until convergence | |||||||
| 	GridBase *grid = evec[0]._grid; | 	GridBase *grid = evec[0]._grid; | ||||||
| 	assert(grid == src._grid); | 	assert(grid == src._grid); | ||||||
|  |  | ||||||
| 	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl; | 	std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl; | ||||||
| 	std::cout << " -- Nm = " << Nm << std::endl; | 	std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl; | ||||||
| 	std::cout << " -- size of eval   = " << eval.size() << std::endl; | 	std::cout<<GridLogMessage << " -- size of eval   = " << eval.size() << std::endl; | ||||||
| 	std::cout << " -- size of evec  = " << evec.size() << std::endl; | 	std::cout<<GridLogMessage << " -- size of evec  = " << evec.size() << std::endl; | ||||||
| 	 | 	 | ||||||
| 	assert(Nm == evec.size() && Nm == eval.size()); | 	assert(Nm == evec.size() && Nm == eval.size()); | ||||||
| 	 | 	 | ||||||
| @@ -502,6 +517,7 @@ until convergence | |||||||
| 	DenseVector<int>   Iconv(Nm); | 	DenseVector<int>   Iconv(Nm); | ||||||
|  |  | ||||||
| 	DenseVector<Field>  B(Nm,grid); // waste of space replicating | 	DenseVector<Field>  B(Nm,grid); // waste of space replicating | ||||||
|  | //	DenseVector<Field>  Btemp(Nm,grid); // waste of space replicating | ||||||
| 	 | 	 | ||||||
| 	Field f(grid); | 	Field f(grid); | ||||||
| 	Field v(grid); | 	Field v(grid); | ||||||
| @@ -517,35 +533,48 @@ until convergence | |||||||
| 	// (uniform vector) Why not src?? | 	// (uniform vector) Why not src?? | ||||||
| 	//	evec[0] = 1.0; | 	//	evec[0] = 1.0; | ||||||
| 	evec[0] = src; | 	evec[0] = src; | ||||||
| 	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl; | 	std:: cout<<GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl; | ||||||
| // << src._grid  << std::endl; | // << src._grid  << std::endl; | ||||||
| 	normalise(evec[0]); | 	normalise(evec[0]); | ||||||
| 	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; | 	std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl; | ||||||
| // << evec[0]._grid << std::endl; | // << evec[0]._grid << std::endl; | ||||||
| 	 | 	 | ||||||
| 	// Initial Nk steps | 	// Initial Nk steps | ||||||
|  | 	OrthoTime=0.; | ||||||
|  | 	double t0=usecond()/1e6; | ||||||
| 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); | 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k); | ||||||
| //	std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl; | 	double t1=usecond()/1e6; | ||||||
| //	std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl; | 	std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||||
|  | //	std:: cout<<GridLogMessage <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl; | ||||||
|  | //	std:: cout<<GridLogMessage <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl; | ||||||
| 	RitzMatrix(evec,Nk); | 	RitzMatrix(evec,Nk); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| 	for(int k=0; k<Nk; ++k){ | 	for(int k=0; k<Nk; ++k){ | ||||||
| //	std:: cout <<"eval " << k << " " <<eval[k] << std::endl; | //	std:: cout<<GridLogMessage <<"eval " << k << " " <<eval[k] << std::endl; | ||||||
| //	std:: cout <<"lme " << k << " " << lme[k] << std::endl; | //	std:: cout<<GridLogMessage <<"lme " << k << " " << lme[k] << std::endl; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Restarting loop begins | 	// Restarting loop begins | ||||||
| 	for(int iter = 0; iter<Niter; ++iter){ | 	for(int iter = 0; iter<Niter; ++iter){ | ||||||
|  |  | ||||||
| 	  std::cout<<"\n Restart iteration = "<< iter << std::endl; | 	  std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl; | ||||||
|  |  | ||||||
| 	  //  | 	  //  | ||||||
| 	  // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs. | 	  // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs. | ||||||
| 	  // We loop over  | 	  // We loop over  | ||||||
| 	  // | 	  // | ||||||
|  | 	OrthoTime=0.; | ||||||
| 	  for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); | 	  for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl; | ||||||
| 	  f *= lme[Nm-1]; | 	  f *= lme[Nm-1]; | ||||||
|  |  | ||||||
| 	  RitzMatrix(evec,k2); | 	  RitzMatrix(evec,k2); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL:: RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| 	   | 	   | ||||||
| 	  // getting eigenvalues | 	  // getting eigenvalues | ||||||
| 	  for(int k=0; k<Nm; ++k){ | 	  for(int k=0; k<Nm; ++k){ | ||||||
| @@ -554,18 +583,27 @@ until convergence | |||||||
| 	  } | 	  } | ||||||
| 	  setUnit_Qt(Nm,Qt); | 	  setUnit_Qt(Nm,Qt); | ||||||
| 	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid); | 	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  |  | ||||||
| 	  // sorting | 	  // sorting | ||||||
| 	  _sort.push(eval2,Nm); | 	  _sort.push(eval2,Nm); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| 	   | 	   | ||||||
| 	  // Implicitly shifted QR transformations | 	  // Implicitly shifted QR transformations | ||||||
| 	  setUnit_Qt(Nm,Qt); | 	  setUnit_Qt(Nm,Qt); | ||||||
|  | 	  for(int ip=0; ip<k2; ++ip){ | ||||||
|  | 	std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl; | ||||||
|  | 	} | ||||||
| 	  for(int ip=k2; ip<Nm; ++ip){  | 	  for(int ip=k2; ip<Nm; ++ip){  | ||||||
| 	std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; | 	std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl; | ||||||
| 	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); | 	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm); | ||||||
| 		 | 		 | ||||||
| 	} | 	} | ||||||
|      | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  | if (0) {   | ||||||
| 	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; | 	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0; | ||||||
| 	   | 	   | ||||||
| 	  for(int j=k1-1; j<k2+1; ++j){ | 	  for(int j=k1-1; j<k2+1; ++j){ | ||||||
| @@ -573,6 +611,30 @@ until convergence | |||||||
| 	    B[j].checkerboard = evec[k].checkerboard; | 	    B[j].checkerboard = evec[k].checkerboard; | ||||||
| 	      B[j] += Qt[k+Nm*j] * evec[k]; | 	      B[j] += Qt[k+Nm*j] * evec[k]; | ||||||
| 	    } | 	    } | ||||||
|  | 	  } | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | if (1) { | ||||||
|  | 	for(int i=0; i<(Nk+1); ++i) { | ||||||
|  | 		B[i] = 0.0; | ||||||
|  | 	  	B[i].checkerboard = evec[0].checkerboard; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	int j_block = 24; int k_block=24; | ||||||
|  | PARALLEL_FOR_LOOP | ||||||
|  | 	for(int ss=0;ss < grid->oSites();ss++){ | ||||||
|  | 	for(int jj=k1-1; jj<k2+1; jj += j_block) | ||||||
|  | 	for(int kk=0; kk<Nm; kk += k_block) | ||||||
|  | 	for(int j=jj; (j<(k2+1)) && j<(jj+j_block); ++j){ | ||||||
|  | 	for(int k=kk; (k<Nm) && k<(kk+k_block) ; ++k){ | ||||||
|  | 	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];  | ||||||
|  | 	} | ||||||
|  | 	} | ||||||
|  | 	} | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| } | } | ||||||
| 	for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; | 	for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j]; | ||||||
|  |  | ||||||
| @@ -581,7 +643,7 @@ until convergence | |||||||
| 	  f += lme[k2-1] * evec[k2]; | 	  f += lme[k2-1] * evec[k2]; | ||||||
| 	  beta_k = norm2(f); | 	  beta_k = norm2(f); | ||||||
| 	  beta_k = sqrt(beta_k); | 	  beta_k = sqrt(beta_k); | ||||||
| 	  std::cout<<" beta(k) = "<<beta_k<<std::endl; | 	  std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl; | ||||||
|  |  | ||||||
| 	  RealD betar = 1.0/beta_k; | 	  RealD betar = 1.0/beta_k; | ||||||
| 	  evec[k2] = betar * f; | 	  evec[k2] = betar * f; | ||||||
| @@ -594,7 +656,10 @@ until convergence | |||||||
| 	  } | 	  } | ||||||
| 	  setUnit_Qt(Nm,Qt); | 	  setUnit_Qt(Nm,Qt); | ||||||
| 	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid); | 	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| 	   | 	   | ||||||
|  | if (0) { | ||||||
| 	  for(int k = 0; k<Nk; ++k) B[k]=0.0; | 	  for(int k = 0; k<Nk; ++k) B[k]=0.0; | ||||||
| 	   | 	   | ||||||
| 	  for(int j = 0; j<Nk; ++j){ | 	  for(int j = 0; j<Nk; ++j){ | ||||||
| @@ -602,12 +667,34 @@ until convergence | |||||||
| 	    B[j].checkerboard = evec[k].checkerboard; | 	    B[j].checkerboard = evec[k].checkerboard; | ||||||
| 	      B[j] += Qt[k+j*Nm] * evec[k]; | 	      B[j] += Qt[k+j*Nm] * evec[k]; | ||||||
| 	    } | 	    } | ||||||
| //	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl; | 	    std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl; | ||||||
|  | 	  } | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  | } | ||||||
|  | if (1) { | ||||||
|  | 	for(int i=0; i<(Nk+1); ++i) { | ||||||
|  | 		B[i] = 0.0; | ||||||
|  | 	  	B[i].checkerboard = evec[0].checkerboard; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	int j_block = 24; int k_block=24; | ||||||
|  | PARALLEL_FOR_LOOP | ||||||
|  | 	for(int ss=0;ss < grid->oSites();ss++){ | ||||||
|  | 	for(int jj=0; jj<Nk; jj += j_block) | ||||||
|  | 	for(int kk=0; kk<Nk; kk += k_block) | ||||||
|  | 	for(int j=jj; (j<Nk) && j<(jj+j_block); ++j){ | ||||||
|  | 	for(int k=kk; (k<Nk) && k<(kk+k_block) ; ++k){ | ||||||
|  | 	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss];  | ||||||
|  | 	} | ||||||
|  | 	} | ||||||
|  | 	} | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::convergence rotation : "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
| } | } | ||||||
| //	_sort.push(eval2,B,Nk); |  | ||||||
|  |  | ||||||
| 	  Nconv = 0; | 	  Nconv = 0; | ||||||
| 	  //	  std::cout << std::setiosflags(std::ios_base::scientific); | 	  //	  std::cout<<GridLogMessage << std::setiosflags(std::ios_base::scientific); | ||||||
| 	  for(int i=0; i<Nk; ++i){ | 	  for(int i=0; i<Nk; ++i){ | ||||||
|  |  | ||||||
| //	    _poly(_Linop,B[i],v); | //	    _poly(_Linop,B[i],v); | ||||||
| @@ -615,14 +702,16 @@ until convergence | |||||||
| 	     | 	     | ||||||
| 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp. | 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp. | ||||||
| 	    RealD vden = norm2(B[i]); | 	    RealD vden = norm2(B[i]); | ||||||
|  | 	    RealD vv0 = norm2(v); | ||||||
| 	    eval2[i] = vnum/vden; | 	    eval2[i] = vnum/vden; | ||||||
| 	    v -= eval2[i]*B[i]; | 	    v -= eval2[i]*B[i]; | ||||||
| 	    RealD vv = norm2(v); | 	    RealD vv = norm2(v); | ||||||
| 	     | 	     | ||||||
| 	    std::cout.precision(13); | 	    std::cout.precision(13); | ||||||
| 	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; | 	    std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] "; | ||||||
| 	    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; | 	    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i]; | ||||||
| 	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl; | 	    std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv; | ||||||
|  | 	    std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl; | ||||||
| 	     | 	     | ||||||
| 	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged | 	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged | ||||||
| 	    if((vv<eresid*eresid) && (i == Nconv) ){ | 	    if((vv<eresid*eresid) && (i == Nconv) ){ | ||||||
| @@ -631,17 +720,19 @@ until convergence | |||||||
| 	    } | 	    } | ||||||
|  |  | ||||||
| 	  }  // i-loop end | 	  }  // i-loop end | ||||||
| 	  //	  std::cout << std::resetiosflags(std::ios_base::scientific); | 	  //	  std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific); | ||||||
|  | 	t1=usecond()/1e6; | ||||||
|  | 	std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1; | ||||||
|  |  | ||||||
|  |  | ||||||
| 	  std::cout<<" #modes converged: "<<Nconv<<std::endl; | 	  std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl; | ||||||
|  |  | ||||||
| 	  if( Nconv>=Nstop ){ | 	  if( Nconv>=Nstop ){ | ||||||
| 	    goto converged; | 	    goto converged; | ||||||
| 	  } | 	  } | ||||||
| 	} // end of iter loop | 	} // end of iter loop | ||||||
| 	 | 	 | ||||||
| 	std::cout<<"\n NOT converged.\n"; | 	std::cout<<GridLogMessage<<"\n NOT converged.\n"; | ||||||
| 	abort(); | 	abort(); | ||||||
| 	 | 	 | ||||||
|       converged: |       converged: | ||||||
| @@ -654,10 +745,10 @@ until convergence | |||||||
|        } |        } | ||||||
|       _sort.push(eval,evec,Nconv); |       _sort.push(eval,evec,Nconv); | ||||||
|  |  | ||||||
|       std::cout << "\n Converged\n Summary :\n"; |       std::cout<<GridLogMessage << "\n Converged\n Summary :\n"; | ||||||
|       std::cout << " -- Iterations  = "<< Nconv  << "\n"; |       std::cout<<GridLogMessage << " -- Iterations  = "<< Nconv  << "\n"; | ||||||
|       std::cout << " -- beta(k)     = "<< beta_k << "\n"; |       std::cout<<GridLogMessage << " -- beta(k)     = "<< beta_k << "\n"; | ||||||
|       std::cout << " -- Nconv       = "<< Nconv  << "\n"; |       std::cout<<GridLogMessage << " -- Nconv       = "<< Nconv  << "\n"; | ||||||
|      } |      } | ||||||
|  |  | ||||||
|     ///////////////////////////////////////////////// |     ///////////////////////////////////////////////// | ||||||
| @@ -680,25 +771,25 @@ until convergence | |||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       std::cout<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl; |       std::cout<<GridLogMessage<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl; | ||||||
|  |  | ||||||
|       // Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1 |       // Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1 | ||||||
|       int first; |       int first; | ||||||
|       if(start == 0){ |       if(start == 0){ | ||||||
|  |  | ||||||
| 	std::cout << "start == 0\n"; //TESTING | 	std::cout<<GridLogMessage << "start == 0\n"; //TESTING | ||||||
|  |  | ||||||
| 	_poly(_Linop,bq[0],bf); | 	_poly(_Linop,bq[0],bf); | ||||||
|  |  | ||||||
| 	alpha = real(innerProduct(bq[0],bf));//alpha =  bq[0]^dag A bq[0] | 	alpha = real(innerProduct(bq[0],bf));//alpha =  bq[0]^dag A bq[0] | ||||||
|  |  | ||||||
| 	std::cout << "alpha = " << alpha << std::endl; | 	std::cout<<GridLogMessage << "alpha = " << alpha << std::endl; | ||||||
| 	 | 	 | ||||||
| 	bf = bf - alpha * bq[0];  //bf =  A bq[0] - alpha bq[0] | 	bf = bf - alpha * bq[0];  //bf =  A bq[0] - alpha bq[0] | ||||||
|  |  | ||||||
| 	H[0][0]=alpha; | 	H[0][0]=alpha; | ||||||
|  |  | ||||||
| 	std::cout << "Set H(0,0) to " << H[0][0] << std::endl; | 	std::cout<<GridLogMessage << "Set H(0,0) to " << H[0][0] << std::endl; | ||||||
|  |  | ||||||
| 	first = 1; | 	first = 1; | ||||||
|  |  | ||||||
| @@ -718,19 +809,19 @@ until convergence | |||||||
|  |  | ||||||
| 	beta = 0;sqbt = 0; | 	beta = 0;sqbt = 0; | ||||||
|  |  | ||||||
| 	std::cout << "cont is true so setting beta to zero\n"; | 	std::cout<<GridLogMessage << "cont is true so setting beta to zero\n"; | ||||||
|  |  | ||||||
|       }	else { |       }	else { | ||||||
|  |  | ||||||
| 	beta = norm2(bf); | 	beta = norm2(bf); | ||||||
| 	sqbt = sqrt(beta); | 	sqbt = sqrt(beta); | ||||||
|  |  | ||||||
| 	std::cout << "beta = " << beta << std::endl; | 	std::cout<<GridLogMessage << "beta = " << beta << std::endl; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       for(int j=first;j<end;j++){ |       for(int j=first;j<end;j++){ | ||||||
|  |  | ||||||
| 	std::cout << "Factor j " << j <<std::endl; | 	std::cout<<GridLogMessage << "Factor j " << j <<std::endl; | ||||||
|  |  | ||||||
| 	if(cont){ // switches to factoring; understand start!=0 and initial bf value is right. | 	if(cont){ // switches to factoring; understand start!=0 and initial bf value is right. | ||||||
| 	  bq[j] = bf; cont = false; | 	  bq[j] = bf; cont = false; | ||||||
| @@ -753,7 +844,7 @@ until convergence | |||||||
|  |  | ||||||
| 	beta = fnorm; | 	beta = fnorm; | ||||||
| 	sqbt = sqrt(beta); | 	sqbt = sqrt(beta); | ||||||
| 	std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; | 	std::cout<<GridLogMessage << "alpha = " << alpha << " fnorm = " << fnorm << '\n'; | ||||||
|  |  | ||||||
| 	///Iterative refinement of orthogonality V = [ bq[0]  bq[1]  ...  bq[M] ] | 	///Iterative refinement of orthogonality V = [ bq[0]  bq[1]  ...  bq[M] ] | ||||||
| 	int re = 0; | 	int re = 0; | ||||||
| @@ -788,8 +879,8 @@ until convergence | |||||||
| 	  bck = sqrt( nmbex ); | 	  bck = sqrt( nmbex ); | ||||||
| 	  re++; | 	  re++; | ||||||
| 	} | 	} | ||||||
| 	std::cout << "Iteratively refined orthogonality, changes alpha\n"; | 	std::cout<<GridLogMessage << "Iteratively refined orthogonality, changes alpha\n"; | ||||||
| 	if(re > 1) std::cout << "orthagonality refined " << re << " times" <<std::endl; | 	if(re > 1) std::cout<<GridLogMessage << "orthagonality refined " << re << " times" <<std::endl; | ||||||
| 	H[j][j]=alpha; | 	H[j][j]=alpha; | ||||||
|       } |       } | ||||||
|  |  | ||||||
| @@ -804,11 +895,13 @@ until convergence | |||||||
|  |  | ||||||
|     void ImplicitRestart(int TM, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont) |     void ImplicitRestart(int TM, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont) | ||||||
|     { |     { | ||||||
|       std::cout << "ImplicitRestart begin. Eigensort starting\n"; |       std::cout<<GridLogMessage << "ImplicitRestart begin. Eigensort starting\n"; | ||||||
|  |  | ||||||
|       DenseMatrix<RealD> H; Resize(H,Nm,Nm); |       DenseMatrix<RealD> H; Resize(H,Nm,Nm); | ||||||
|  |  | ||||||
|  | #ifndef USE_LAPACK | ||||||
|       EigenSort(evals, evecs); |       EigenSort(evals, evecs); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|       ///Assign shifts |       ///Assign shifts | ||||||
|       int K=Nk; |       int K=Nk; | ||||||
| @@ -831,15 +924,15 @@ until convergence | |||||||
|       /// Shifted H defines a new K step Arnoldi factorization |       /// Shifted H defines a new K step Arnoldi factorization | ||||||
|       RealD  beta = H[ff][ff-1];  |       RealD  beta = H[ff][ff-1];  | ||||||
|       RealD  sig  = Q[TM - 1][ff - 1]; |       RealD  sig  = Q[TM - 1][ff - 1]; | ||||||
|       std::cout << "beta = " << beta << " sig = " << real(sig) <<std::endl; |       std::cout<<GridLogMessage << "beta = " << beta << " sig = " << real(sig) <<std::endl; | ||||||
|  |  | ||||||
|       std::cout << "TM = " << TM << " "; |       std::cout<<GridLogMessage << "TM = " << TM << " "; | ||||||
|       std::cout << norm2(bq[0]) << " -- before" <<std::endl; |       std::cout<<GridLogMessage << norm2(bq[0]) << " -- before" <<std::endl; | ||||||
|  |  | ||||||
|       /// q -> q Q |       /// q -> q Q | ||||||
|       times_real(bq, Q, TM); |       times_real(bq, Q, TM); | ||||||
|  |  | ||||||
|       std::cout << norm2(bq[0]) << " -- after " << ff <<std::endl; |       std::cout<<GridLogMessage << norm2(bq[0]) << " -- after " << ff <<std::endl; | ||||||
|       bf =  beta* bq[ff] + sig* bf; |       bf =  beta* bq[ff] + sig* bf; | ||||||
|  |  | ||||||
|       /// Do the rest of the factorization |       /// Do the rest of the factorization | ||||||
| @@ -863,7 +956,7 @@ until convergence | |||||||
|       int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with |       int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with | ||||||
|  |  | ||||||
|       if(ff < M) { |       if(ff < M) { | ||||||
| 	std::cout << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl; | 	std::cout<<GridLogMessage << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl; | ||||||
| 	abort(); // Why would this happen? | 	abort(); // Why would this happen? | ||||||
|       } |       } | ||||||
|  |  | ||||||
| @@ -872,7 +965,7 @@ until convergence | |||||||
|  |  | ||||||
|       for(int it = 0; it < Niter && (converged < Nk); ++it) { |       for(int it = 0; it < Niter && (converged < Nk); ++it) { | ||||||
|  |  | ||||||
| 	std::cout << "Krylov: Iteration --> " << it << std::endl; | 	std::cout<<GridLogMessage << "Krylov: Iteration --> " << it << std::endl; | ||||||
| 	int lock_num = lock ? converged : 0; | 	int lock_num = lock ? converged : 0; | ||||||
| 	DenseVector<RealD> tevals(M - lock_num ); | 	DenseVector<RealD> tevals(M - lock_num ); | ||||||
| 	DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num); | 	DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num); | ||||||
| @@ -888,7 +981,7 @@ until convergence | |||||||
|       Wilkinson<RealD>(H, evals, evecs, small);  |       Wilkinson<RealD>(H, evals, evecs, small);  | ||||||
|       //      Check(); |       //      Check(); | ||||||
|  |  | ||||||
|       std::cout << "Done  "<<std::endl; |       std::cout<<GridLogMessage << "Done  "<<std::endl; | ||||||
|  |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -953,7 +1046,7 @@ until convergence | |||||||
| 		  DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs,  | 		  DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs,  | ||||||
| 		  int lock, int converged) | 		  int lock, int converged) | ||||||
|     { |     { | ||||||
|       std::cout << "Converged " << converged << " so far." << std::endl; |       std::cout<<GridLogMessage << "Converged " << converged << " so far." << std::endl; | ||||||
|       int lock_num = lock ? converged : 0; |       int lock_num = lock ? converged : 0; | ||||||
|       int M = Nm; |       int M = Nm; | ||||||
|  |  | ||||||
| @@ -968,7 +1061,9 @@ until convergence | |||||||
|       RealD small=1.0e-16; |       RealD small=1.0e-16; | ||||||
|       Wilkinson<RealD>(AH, tevals, tevecs, small); |       Wilkinson<RealD>(AH, tevals, tevecs, small); | ||||||
|  |  | ||||||
|  | #ifndef USE_LAPACK | ||||||
|       EigenSort(tevals, tevecs); |       EigenSort(tevals, tevecs); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|       RealD resid_nrm=  norm2(bf); |       RealD resid_nrm=  norm2(bf); | ||||||
|  |  | ||||||
| @@ -979,7 +1074,7 @@ until convergence | |||||||
| 	RealD diff = 0; | 	RealD diff = 0; | ||||||
| 	diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; | 	diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm; | ||||||
|  |  | ||||||
| 	std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; | 	std::cout<<GridLogMessage << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl; | ||||||
|  |  | ||||||
| 	if(diff < converged) { | 	if(diff < converged) { | ||||||
|  |  | ||||||
| @@ -995,13 +1090,13 @@ until convergence | |||||||
| 	    lock_num++; | 	    lock_num++; | ||||||
| 	  } | 	  } | ||||||
| 	  converged++; | 	  converged++; | ||||||
| 	  std::cout << " converged on eval " << converged << " of " << Nk << std::endl; | 	  std::cout<<GridLogMessage << " converged on eval " << converged << " of " << Nk << std::endl; | ||||||
| 	} else { | 	} else { | ||||||
| 	  break; | 	  break; | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
| #endif | #endif | ||||||
|       std::cout << "Got " << converged << " so far " <<std::endl;	 |       std::cout<<GridLogMessage << "Got " << converged << " so far " <<std::endl;	 | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     ///Check |     ///Check | ||||||
| @@ -1010,7 +1105,9 @@ until convergence | |||||||
|  |  | ||||||
|       DenseVector<RealD> goodval(this->get); |       DenseVector<RealD> goodval(this->get); | ||||||
|  |  | ||||||
|  | #ifndef USE_LAPACK | ||||||
|       EigenSort(evals,evecs); |       EigenSort(evals,evecs); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|       int NM = Nm; |       int NM = Nm; | ||||||
|  |  | ||||||
| @@ -1082,14 +1179,16 @@ say con = 2 | |||||||
| **/ | **/ | ||||||
|  |  | ||||||
| template<class T> | template<class T> | ||||||
| static void Lock(DenseMatrix<T> &H, 	// Hess mtx	 | static void Lock(DenseMatrix<T> &H, 	///Hess mtx	 | ||||||
| 		 DenseMatrix<T> &Q, 	// Lock Transform | 		 DenseMatrix<T> &Q, 	///Lock Transform | ||||||
| 		 T val, 		// value to be locked | 		 T val, 		///value to be locked | ||||||
| 		 int con, 	// number already locked | 		 int con, 	///number already locked | ||||||
| 		 RealD small, | 		 RealD small, | ||||||
| 		 int dfg, | 		 int dfg, | ||||||
| 		 bool herm) | 		 bool herm) | ||||||
| {	 | {	 | ||||||
|  |  | ||||||
|  |  | ||||||
|   //ForceTridiagonal(H); |   //ForceTridiagonal(H); | ||||||
|  |  | ||||||
|   int M = H.dim; |   int M = H.dim; | ||||||
| @@ -1122,6 +1221,7 @@ static void Lock(DenseMatrix<T> &H, 	// Hess mtx | |||||||
|   AH = Hermitian(QQ)*AH; |   AH = Hermitian(QQ)*AH; | ||||||
|   AH = AH*QQ; |   AH = AH*QQ; | ||||||
| 	 | 	 | ||||||
|  |  | ||||||
|   for(int i=con;i<M;i++){ |   for(int i=con;i<M;i++){ | ||||||
|     for(int j=con;j<M;j++){ |     for(int j=con;j<M;j++){ | ||||||
|       Q[i][j]=QQ[i-con][j-con]; |       Q[i][j]=QQ[i-con][j-con]; | ||||||
|   | |||||||
							
								
								
									
										453
									
								
								lib/algorithms/iterative/Matrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										453
									
								
								lib/algorithms/iterative/Matrix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,453 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/Matrix.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef MATRIX_H | ||||||
|  | #define MATRIX_H | ||||||
|  |  | ||||||
|  | #include <cstdlib> | ||||||
|  | #include <string> | ||||||
|  | #include <cmath> | ||||||
|  | #include <vector> | ||||||
|  | #include <iostream> | ||||||
|  | #include <iomanip> | ||||||
|  | #include <complex> | ||||||
|  | #include <typeinfo> | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /** Sign function **/ | ||||||
|  | template <class T> T sign(T p){return ( p/abs(p) );} | ||||||
|  |  | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | ///////////////////// Hijack STL containers for our wicked means ///////////////////////////////////////// | ||||||
|  | ///////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | template<class T> using Vector = Vector<T>; | ||||||
|  | template<class T> using Matrix = Vector<Vector<T> >; | ||||||
|  |  | ||||||
|  | template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); } | ||||||
|  |  | ||||||
|  | template<class T> void Resize(Matrix<T > & mat, int N, int M) {  | ||||||
|  |   mat.resize(N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     mat[i].resize(M); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<class T> void Size(Vector<T> & vec, int &N)  | ||||||
|  | {  | ||||||
|  |   N= vec.size(); | ||||||
|  | } | ||||||
|  | template<class T> void Size(Matrix<T> & mat, int &N,int &M)  | ||||||
|  | {  | ||||||
|  |   N= mat.size(); | ||||||
|  |   M= mat[0].size(); | ||||||
|  | } | ||||||
|  | template<class T> void SizeSquare(Matrix<T> & mat, int &N)  | ||||||
|  | {  | ||||||
|  |   int M; Size(mat,N,M); | ||||||
|  |   assert(N==M); | ||||||
|  | } | ||||||
|  | template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)  | ||||||
|  | {  | ||||||
|  |   int N2,M2; | ||||||
|  |   Size(mat1,N1,M1); | ||||||
|  |   Size(mat2,N2,M2); | ||||||
|  |   assert(N1==N2); | ||||||
|  |   assert(M1==M2); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | //***************************************** | ||||||
|  | //*	(Complex) Vector operations	* | ||||||
|  | //***************************************** | ||||||
|  |  | ||||||
|  | /**Conj of a Vector **/ | ||||||
|  | template <class T> Vector<T> conj(Vector<T> p){ | ||||||
|  | 	Vector<T> q(p.size()); | ||||||
|  | 	for(int i=0;i<p.size();i++){q[i] = conj(p[i]);} | ||||||
|  | 	return q; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Norm of a Vector**/ | ||||||
|  | template <class T> T norm(Vector<T> p){ | ||||||
|  | 	T sum = 0; | ||||||
|  | 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} | ||||||
|  | 	return abs(sqrt(sum)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Norm squared of a Vector **/ | ||||||
|  | template <class T> T norm2(Vector<T> p){ | ||||||
|  | 	T sum = 0; | ||||||
|  | 	for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);} | ||||||
|  | 	return abs((sum)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Sum elements of a Vector **/ | ||||||
|  | template <class T> T trace(Vector<T> p){ | ||||||
|  | 	T sum = 0; | ||||||
|  | 	for(int i=0;i<p.size();i++){sum = sum + p[i];} | ||||||
|  | 	return sum; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Fill a Vector with constant c **/ | ||||||
|  | template <class T> void Fill(Vector<T> &p, T c){ | ||||||
|  | 	for(int i=0;i<p.size();i++){p[i] = c;} | ||||||
|  | } | ||||||
|  | /** Normalize a Vector **/ | ||||||
|  | template <class T> void normalize(Vector<T> &p){ | ||||||
|  | 	T m = norm(p); | ||||||
|  | 	if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;} | ||||||
|  | } | ||||||
|  | /** Vector by scalar **/ | ||||||
|  | template <class T, class U> Vector<T> times(Vector<T> p, U s){ | ||||||
|  | 	for(int i=0;i<p.size();i++){p[i] *= s;} | ||||||
|  | 	return p; | ||||||
|  | } | ||||||
|  | template <class T, class U> Vector<T> times(U s, Vector<T> p){ | ||||||
|  | 	for(int i=0;i<p.size();i++){p[i] *= s;} | ||||||
|  | 	return p; | ||||||
|  | } | ||||||
|  | /** inner product of a and b = conj(a) . b **/ | ||||||
|  | template <class T> T inner(Vector<T> a, Vector<T> b){ | ||||||
|  | 	T m = 0.; | ||||||
|  | 	for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];} | ||||||
|  | 	return m; | ||||||
|  | } | ||||||
|  | /** sum of a and b = a + b **/ | ||||||
|  | template <class T> Vector<T> add(Vector<T> a, Vector<T> b){ | ||||||
|  | 	Vector<T> m(a.size()); | ||||||
|  | 	for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];} | ||||||
|  | 	return m; | ||||||
|  | } | ||||||
|  | /** sum of a and b = a - b **/ | ||||||
|  | template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){ | ||||||
|  | 	Vector<T> m(a.size()); | ||||||
|  | 	for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];} | ||||||
|  | 	return m; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**  | ||||||
|  |  ********************************* | ||||||
|  |  *	Matrices	         * | ||||||
|  |  ********************************* | ||||||
|  |  **/ | ||||||
|  |  | ||||||
|  | template<class T> void Fill(Matrix<T> & mat, T&val) {  | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |   for(int j=0;j<M;j++){ | ||||||
|  |     mat[i][j] = val; | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Transpose of a matrix **/ | ||||||
|  | Matrix<T> Transpose(Matrix<T> & mat){ | ||||||
|  |   int N,M; | ||||||
|  |   Size(mat,N,M); | ||||||
|  |   Matrix C; Resize(C,M,N); | ||||||
|  |   for(int i=0;i<M;i++){ | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     C[i][j] = mat[j][i]; | ||||||
|  |   }}  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  | /** Set Matrix to unit matrix **/ | ||||||
|  | template<class T> void Unity(Matrix<T> &mat){ | ||||||
|  |   int N;  SizeSquare(mat,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     for(int j=0;j<N;j++){ | ||||||
|  |       if ( i==j ) A[i][j] = 1; | ||||||
|  |       else        A[i][j] = 0; | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  | } | ||||||
|  | /** Add C * I to matrix **/ | ||||||
|  | template<class T> | ||||||
|  | void PlusUnit(Matrix<T> & A,T c){ | ||||||
|  |   int dim;  SizeSquare(A,dim); | ||||||
|  |   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** return the Hermitian conjugate of matrix **/ | ||||||
|  | Matrix<T> HermitianConj(Matrix<T> &mat){ | ||||||
|  |  | ||||||
|  |   int dim; SizeSquare(mat,dim); | ||||||
|  |  | ||||||
|  |   Matrix<T> C; Resize(C,dim,dim); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     for(int j=0;j<dim;j++){ | ||||||
|  |       C[i][j] = conj(mat[j][i]); | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  |   return C; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** return diagonal entries as a Vector **/ | ||||||
|  | Vector<T> diag(Matrix<T> &A) | ||||||
|  | { | ||||||
|  |   int dim; SizeSquare(A,dim); | ||||||
|  |   Vector<T> d; Resize(d,dim); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     d[i] = A[i][i]; | ||||||
|  |   } | ||||||
|  |   return d; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Left multiply by a Vector **/ | ||||||
|  | Vector<T> operator *(Vector<T> &B,Matrix<T> &A) | ||||||
|  | { | ||||||
|  |   int K,M,N;  | ||||||
|  |   Size(B,K); | ||||||
|  |   Size(A,M,N); | ||||||
|  |   assert(K==M); | ||||||
|  |    | ||||||
|  |   Vector<T> C; Resize(C,N); | ||||||
|  |  | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     T sum = 0.0; | ||||||
|  |     for(int i=0;i<M;i++){ | ||||||
|  |       sum += B[i] * A[i][j]; | ||||||
|  |     } | ||||||
|  |     C[j] =  sum; | ||||||
|  |   } | ||||||
|  |   return C;  | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** return 1/diagonal entries as a Vector **/ | ||||||
|  | Vector<T> inv_diag(Matrix<T> & A){ | ||||||
|  |   int dim; SizeSquare(A,dim); | ||||||
|  |   Vector<T> d; Resize(d,dim); | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     d[i] = 1.0/A[i][i]; | ||||||
|  |   } | ||||||
|  |   return d; | ||||||
|  | } | ||||||
|  | /** Matrix Addition **/ | ||||||
|  | inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B) | ||||||
|  | { | ||||||
|  |   int N,M  ; SizeSame(A,B,N,M); | ||||||
|  |   Matrix C; Resize(C,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     for(int j=0;j<M;j++){ | ||||||
|  |       C[i][j] = A[i][j] +  B[i][j]; | ||||||
|  |     }  | ||||||
|  |   }  | ||||||
|  |   return C; | ||||||
|  | }  | ||||||
|  | /** Matrix Subtraction **/ | ||||||
|  | inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){ | ||||||
|  |   int N,M  ; SizeSame(A,B,N,M); | ||||||
|  |   Matrix C; Resize(C,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |   for(int j=0;j<M;j++){ | ||||||
|  |     C[i][j] = A[i][j] -  B[i][j]; | ||||||
|  |   }} | ||||||
|  |   return C; | ||||||
|  | }  | ||||||
|  |  | ||||||
|  | /** Matrix scalar multiplication **/ | ||||||
|  | inline Matrix<T> operator* (Matrix<T> & A,T c){ | ||||||
|  |   int N,M; Size(A,N,M); | ||||||
|  |   Matrix C; Resize(C,N,M); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |   for(int j=0;j<M;j++){ | ||||||
|  |     C[i][j] = A[i][j]*c; | ||||||
|  |   }}  | ||||||
|  |   return C; | ||||||
|  | }  | ||||||
|  | /** Matrix Matrix multiplication **/ | ||||||
|  | inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){ | ||||||
|  |   int K,L,N,M; | ||||||
|  |   Size(A,K,L); | ||||||
|  |   Size(B,N,M); assert(L==N); | ||||||
|  |   Matrix C; Resize(C,K,M); | ||||||
|  |  | ||||||
|  |   for(int i=0;i<K;i++){ | ||||||
|  |     for(int j=0;j<M;j++){ | ||||||
|  |       T sum = 0.0; | ||||||
|  |       for(int k=0;k<N;k++) sum += A[i][k]*B[k][j]; | ||||||
|  |       C[i][j] =sum; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return C;  | ||||||
|  | }  | ||||||
|  | /** Matrix Vector multiplication **/ | ||||||
|  | inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){ | ||||||
|  |   int M,N,K; | ||||||
|  |   Size(A,N,M); | ||||||
|  |   Size(B,K); assert(K==M); | ||||||
|  |   Vector<T> C; Resize(C,N); | ||||||
|  |   for(int i=0;i<N;i++){ | ||||||
|  |     T sum = 0.0; | ||||||
|  |     for(int j=0;j<M;j++) sum += A[i][j]*B[j]; | ||||||
|  |     C[i] =  sum; | ||||||
|  |   } | ||||||
|  |   return C;  | ||||||
|  | }  | ||||||
|  |  | ||||||
|  | /** Some version of Matrix norm **/ | ||||||
|  | /* | ||||||
|  | inline T Norm(){ // this is not a usual L2 norm | ||||||
|  |     T norm = 0; | ||||||
|  |     for(int i=0;i<dim;i++){ | ||||||
|  |       for(int j=0;j<dim;j++){ | ||||||
|  | 	norm += abs(A[i][j]); | ||||||
|  |     }} | ||||||
|  |     return norm; | ||||||
|  |   } | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | /** Some version of Matrix norm **/ | ||||||
|  | template<class T> T LargestDiag(Matrix<T> &A) | ||||||
|  | { | ||||||
|  |   int dim ; SizeSquare(A,dim);  | ||||||
|  |  | ||||||
|  |   T ld = abs(A[0][0]); | ||||||
|  |   for(int i=1;i<dim;i++){ | ||||||
|  |     T cf = abs(A[i][i]); | ||||||
|  |     if(abs(cf) > abs(ld) ){ld = cf;} | ||||||
|  |   } | ||||||
|  |   return ld; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ | ||||||
|  | template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small) | ||||||
|  | { | ||||||
|  |   int dim; SizeSquare(A,dim); | ||||||
|  |   for(int l = dim - 1 - offset; l >= 1; l--) {             		 | ||||||
|  |     if((U)abs(A[l][l - 1]) < (U)small) { | ||||||
|  |       A[l][l-1]=(U)0.0; | ||||||
|  |       return l; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** Look for entries on the leading subdiagonal that are smaller than 'small' **/ | ||||||
|  | template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)  | ||||||
|  | { | ||||||
|  |   int dim; SizeSquare(A,dim); | ||||||
|  |   for(int l = dim - 1 - offset; l >= 1; l--) { | ||||||
|  |     if((U)abs(A[l][l - 1]) < (U)small) { | ||||||
|  |       A[l][l - 1] = (U)0.0; | ||||||
|  |       A[l - 1][l] = (U)0.0; | ||||||
|  |       return l; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   return 0; | ||||||
|  | } | ||||||
|  | /**Assign a submatrix to a larger one**/ | ||||||
|  | template<class T> | ||||||
|  | void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) | ||||||
|  | { | ||||||
|  |   for(int i = row_st; i<row_end; i++){ | ||||||
|  |     for(int j = col_st; j<col_end; j++){ | ||||||
|  |       A[i][j] = S[i - row_st][j - col_st]; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /**Get a square submatrix**/ | ||||||
|  | template <class T> | ||||||
|  | Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||||
|  | { | ||||||
|  |   Matrix<T> H; Resize(row_end - row_st,col_end-col_st); | ||||||
|  |  | ||||||
|  |   for(int i = row_st; i<row_end; i++){ | ||||||
|  |   for(int j = col_st; j<col_end; j++){ | ||||||
|  |     H[i-row_st][j-col_st]=A[i][j]; | ||||||
|  |   }} | ||||||
|  |   return H; | ||||||
|  | } | ||||||
|  |    | ||||||
|  |  /**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/ | ||||||
|  | template<class T> | ||||||
|  | void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S) | ||||||
|  | { | ||||||
|  |   for(int i = row_st; i<row_end; i++){ | ||||||
|  |   for(int j = col_st; j<col_end; j++){ | ||||||
|  |     A[i][j] = S[i - row_st][j - col_st]; | ||||||
|  |   }} | ||||||
|  | } | ||||||
|  |    | ||||||
|  | /** compute b_i A_ij b_j **/ // surprised no Conj | ||||||
|  | template<class T> T proj(Matrix<T> A, Vector<T> B){ | ||||||
|  |   int dim; SizeSquare(A,dim); | ||||||
|  |   int dimB; Size(B,dimB); | ||||||
|  |   assert(dimB==dim); | ||||||
|  |   T C = 0; | ||||||
|  |   for(int i=0;i<dim;i++){ | ||||||
|  |     T sum = 0.0; | ||||||
|  |     for(int j=0;j<dim;j++){ | ||||||
|  |       sum += A[i][j]*B[j]; | ||||||
|  |     } | ||||||
|  |     C +=  B[i]*sum; // No conj? | ||||||
|  |   } | ||||||
|  |   return C;  | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  ************************************************************* | ||||||
|  |  * | ||||||
|  |  * Matrix Vector products | ||||||
|  |  * | ||||||
|  |  ************************************************************* | ||||||
|  |  */ | ||||||
|  | // Instead make a linop and call my CG; | ||||||
|  |  | ||||||
|  | /// q -> q Q | ||||||
|  | template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q) | ||||||
|  | { | ||||||
|  |   int M; SizeSquare(Q,M); | ||||||
|  |   int N; Size(q,N);  | ||||||
|  |   assert(M==N); | ||||||
|  |  | ||||||
|  |   times(q,Q,N); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// q -> q Q | ||||||
|  | template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N) | ||||||
|  | { | ||||||
|  |   GridBase *grid = q[0]._grid; | ||||||
|  |   int M; SizeSquare(Q,M); | ||||||
|  |   int K; Size(q,K);  | ||||||
|  |   assert(N<M); | ||||||
|  |   assert(N<K); | ||||||
|  |   Vector<Fermion> S(N,grid ); | ||||||
|  |   for(int j=0;j<N;j++){ | ||||||
|  |     S[j] = zero; | ||||||
|  |     for(int k=0;k<N;k++){ | ||||||
|  |       S[j] = S[j] +  q[k]* Q[k][j];  | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   for(int j=0;j<q.size();j++){ | ||||||
|  |     q[j] = S[j]; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | #endif | ||||||
							
								
								
									
										75
									
								
								lib/algorithms/iterative/MatrixUtils.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								lib/algorithms/iterative/MatrixUtils.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/MatrixUtils.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #ifndef GRID_MATRIX_UTILS_H | ||||||
|  | #define GRID_MATRIX_UTILS_H | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  |   namespace MatrixUtils {  | ||||||
|  |  | ||||||
|  |     template<class T> inline void Size(Matrix<T>& A,int &N,int &M){ | ||||||
|  |       N=A.size(); assert(N>0); | ||||||
|  |       M=A[0].size(); | ||||||
|  |       for(int i=0;i<N;i++){ | ||||||
|  | 	assert(A[i].size()==M); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     template<class T> inline void SizeSquare(Matrix<T>& A,int &N) | ||||||
|  |     { | ||||||
|  |       int M; | ||||||
|  |       Size(A,N,M); | ||||||
|  |       assert(N==M); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     template<class T> inline void Fill(Matrix<T>& A,T & val) | ||||||
|  |     {  | ||||||
|  |       int N,M; | ||||||
|  |       Size(A,N,M); | ||||||
|  |       for(int i=0;i<N;i++){ | ||||||
|  |       for(int j=0;j<M;j++){ | ||||||
|  | 	A[i][j]=val; | ||||||
|  |       }} | ||||||
|  |     } | ||||||
|  |     template<class T> inline void Diagonal(Matrix<T>& A,T & val) | ||||||
|  |     {  | ||||||
|  |       int N; | ||||||
|  |       SizeSquare(A,N); | ||||||
|  |       for(int i=0;i<N;i++){ | ||||||
|  | 	A[i][i]=val; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     template<class T> inline void Identity(Matrix<T>& A) | ||||||
|  |     { | ||||||
|  |       Fill(A,0.0); | ||||||
|  |       Diagonal(A,1.0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  | } | ||||||
|  | #endif | ||||||
| @@ -141,5 +141,85 @@ namespace Grid { | |||||||
|     }      |     }      | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Take a matrix and form a Red Black solver calling a Herm solver | ||||||
|  |   // Use of RB info prevents making SchurRedBlackSolve conform to standard interface | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   template<class Field> class SchurRedBlackDiagTwoSolve { | ||||||
|  |   private: | ||||||
|  |     OperatorFunction<Field> & _HermitianRBSolver; | ||||||
|  |     int CBfactorise; | ||||||
|  |   public: | ||||||
|  |  | ||||||
|  |     ///////////////////////////////////////////////////// | ||||||
|  |     // Wrap the usual normal equations Schur trick | ||||||
|  |     ///////////////////////////////////////////////////// | ||||||
|  |   SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  : | ||||||
|  |      _HermitianRBSolver(HermitianRBSolver)  | ||||||
|  |     {  | ||||||
|  |       CBfactorise=0; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     template<class Matrix> | ||||||
|  |       void operator() (Matrix & _Matrix,const Field &in, Field &out){ | ||||||
|  |  | ||||||
|  |       // FIXME CGdiagonalMee not implemented virtual function | ||||||
|  |       // FIXME use CBfactorise to control schur decomp | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       GridBase *fgrid= _Matrix.Grid(); | ||||||
|  |  | ||||||
|  |       SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||||
|  |   | ||||||
|  |       Field src_e(grid); | ||||||
|  |       Field src_o(grid); | ||||||
|  |       Field sol_e(grid); | ||||||
|  |       Field sol_o(grid); | ||||||
|  |       Field   tmp(grid); | ||||||
|  |       Field  Mtmp(grid); | ||||||
|  |       Field resid(fgrid); | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,src_e,in); | ||||||
|  |       pickCheckerboard(Odd ,src_o,in); | ||||||
|  |       pickCheckerboard(Even,sol_e,out); | ||||||
|  |       pickCheckerboard(Odd ,sol_o,out); | ||||||
|  |      | ||||||
|  |       ///////////////////////////////////////////////////// | ||||||
|  |       // src_o = Mdag * (source_o - Moe MeeInv source_e) | ||||||
|  |       ///////////////////////////////////////////////////// | ||||||
|  |       _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even); | ||||||
|  |       _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);      | ||||||
|  |       tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);      | ||||||
|  |  | ||||||
|  |       // get the right MpcDag | ||||||
|  |       _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);        | ||||||
|  |  | ||||||
|  |       ////////////////////////////////////////////////////////////// | ||||||
|  |       // Call the red-black solver | ||||||
|  |       ////////////////////////////////////////////////////////////// | ||||||
|  |       std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl; | ||||||
|  | //      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd); | ||||||
|  |       _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd); | ||||||
|  |       _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd); | ||||||
|  |  | ||||||
|  |       /////////////////////////////////////////////////// | ||||||
|  |       // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... | ||||||
|  |       /////////////////////////////////////////////////// | ||||||
|  |       _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even); | ||||||
|  |       src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even); | ||||||
|  |       _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even); | ||||||
|  |       | ||||||
|  |       setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even); | ||||||
|  |       setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd ); | ||||||
|  |  | ||||||
|  |       // Verify the unprec residual | ||||||
|  |       _Matrix.M(out,resid);  | ||||||
|  |       resid = resid-in; | ||||||
|  |       RealD ns = norm2(in); | ||||||
|  |       RealD nr = norm2(resid); | ||||||
|  |  | ||||||
|  |       std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; | ||||||
|  |     }      | ||||||
|  |   }; | ||||||
|  |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								lib/algorithms/iterative/TODO
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								lib/algorithms/iterative/TODO
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | |||||||
|  | - ConjugateGradientMultiShift | ||||||
|  | - MCR | ||||||
|  |  | ||||||
|  | - Potentially Useful Boost libraries | ||||||
|  |  | ||||||
|  | - MultiArray | ||||||
|  | - Aligned allocator; memory pool | ||||||
|  | - Remez -- Mike or Boost? | ||||||
|  | - Multiprecision | ||||||
|  | - quaternians | ||||||
|  | - Tokenize | ||||||
|  | - Serialization | ||||||
|  | - Regex | ||||||
|  | - Proto (ET) | ||||||
|  | - uBlas | ||||||
							
								
								
									
										122
									
								
								lib/algorithms/iterative/bisec.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								lib/algorithms/iterative/bisec.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | |||||||
|  | #include <math.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | struct Bisection { | ||||||
|  |  | ||||||
|  | static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig) | ||||||
|  | { | ||||||
|  |   int i,j; | ||||||
|  |   std::vector<RealD> evec1(row_num+3); | ||||||
|  |   std::vector<RealD> evec2(row_num+3); | ||||||
|  |   RealD eps2; | ||||||
|  |   ALPHA[1]=0.; | ||||||
|  |   BETHA[1]=0.; | ||||||
|  |   for(i=0;i<row_num-1;i++) { | ||||||
|  |     ALPHA[i+1] = A[i*(row_num+1)].real(); | ||||||
|  |     BETHA[i+2] = A[i*(row_num+1)+1].real(); | ||||||
|  |   } | ||||||
|  |   ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real(); | ||||||
|  |   bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2); | ||||||
|  |   bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2); | ||||||
|  |  | ||||||
|  |   // Do we really need to sort here? | ||||||
|  |   int begin=1; | ||||||
|  |   int end = row_num; | ||||||
|  |   int swapped=1; | ||||||
|  |   while(swapped) { | ||||||
|  |     swapped=0; | ||||||
|  |     for(i=begin;i<end;i++){ | ||||||
|  |       if(mag(evec2[i])>mag(evec2[i+1]))	{ | ||||||
|  | 	swap(evec2+i,evec2+i+1); | ||||||
|  | 	swapped=1; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     end--; | ||||||
|  |     for(i=end-1;i>=begin;i--){ | ||||||
|  |       if(mag(evec2[i])>mag(evec2[i+1]))	{ | ||||||
|  | 	swap(evec2+i,evec2+i+1); | ||||||
|  | 	swapped=1; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     begin++; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   for(i=0;i<row_num;i++){ | ||||||
|  |     for(j=0;j<row_num;j++) { | ||||||
|  |       if(i==j) H[i*row_num+j]=evec2[i+1]; | ||||||
|  |       else H[i*row_num+j]=0.; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void bisec(std::vector<RealD> &c,    | ||||||
|  | 		  std::vector<RealD> &b, | ||||||
|  | 		  int n, | ||||||
|  | 		  int m1, | ||||||
|  | 		  int m2, | ||||||
|  | 		  RealD eps1, | ||||||
|  | 		  RealD relfeh, | ||||||
|  | 		  std::vector<RealD> &x, | ||||||
|  | 		  RealD &eps2) | ||||||
|  | { | ||||||
|  |   std::vector<RealD> wu(n+2); | ||||||
|  |  | ||||||
|  |   RealD h,q,x1,xu,x0,xmin,xmax;  | ||||||
|  |   int i,a,k; | ||||||
|  |  | ||||||
|  |   b[1]=0.0; | ||||||
|  |   xmin=c[n]-fabs(b[n]); | ||||||
|  |   xmax=c[n]+fabs(b[n]); | ||||||
|  |   for(i=1;i<n;i++){ | ||||||
|  |     h=fabs(b[i])+fabs(b[i+1]); | ||||||
|  |     if(c[i]+h>xmax) xmax= c[i]+h; | ||||||
|  |     if(c[i]-h<xmin) xmin= c[i]-h; | ||||||
|  |   } | ||||||
|  |   xmax *=2.; | ||||||
|  |  | ||||||
|  |   eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin); | ||||||
|  |   if(eps1<=0.0) eps1=eps2; | ||||||
|  |   eps2=0.5*eps1+7.0*(eps2); | ||||||
|  |   x0=xmax; | ||||||
|  |   for(i=m1;i<=m2;i++){ | ||||||
|  |     x[i]=xmax; | ||||||
|  |     wu[i]=xmin; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   for(k=m2;k>=m1;k--){ | ||||||
|  |     xu=xmin; | ||||||
|  |     i=k; | ||||||
|  |     do{ | ||||||
|  |       if(xu<wu[i]){ | ||||||
|  | 	xu=wu[i]; | ||||||
|  | 	i=m1-1; | ||||||
|  |       } | ||||||
|  |       i--; | ||||||
|  |     }while(i>=m1); | ||||||
|  |     if(x0>x[k]) x0=x[k]; | ||||||
|  |     while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){ | ||||||
|  |       x1=(xu+x0)/2; | ||||||
|  |  | ||||||
|  |       a=0; | ||||||
|  |       q=1.0; | ||||||
|  |       for(i=1;i<=n;i++){ | ||||||
|  | 	q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh); | ||||||
|  | 	if(q<0) a++; | ||||||
|  |       } | ||||||
|  |       //			printf("x1=%e a=%d\n",x1,a); | ||||||
|  |       if(a<k){ | ||||||
|  | 	if(a<m1){ | ||||||
|  | 	  xu=x1; | ||||||
|  | 	  wu[m1]=x1; | ||||||
|  | 	}else { | ||||||
|  | 	  xu=x1; | ||||||
|  | 	  wu[a+1]=x1; | ||||||
|  | 	  if(x[a]>x1) x[a]=x1; | ||||||
|  | 	} | ||||||
|  |       }else x0=x1; | ||||||
|  |     } | ||||||
|  |     x[k]=(x0+xu)/2; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | } | ||||||
							
								
								
									
										1
									
								
								lib/algorithms/iterative/get_eig.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								lib/algorithms/iterative/get_eig.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  |  | ||||||
							
								
								
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -30,8 +30,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_LATTICE_REDUCTION_H | #ifndef GRID_LATTICE_REDUCTION_H | ||||||
| #define GRID_LATTICE_REDUCTION_H | #define GRID_LATTICE_REDUCTION_H | ||||||
|  |  | ||||||
| #include <Grid/Eigen/Dense> |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| #ifdef GRID_WARN_SUBOPTIMAL | #ifdef GRID_WARN_SUBOPTIMAL | ||||||
| #warning "Optimisation alert all these reduction loops are NOT threaded " | #warning "Optimisation alert all these reduction loops are NOT threaded " | ||||||
| @@ -45,25 +43,27 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | |||||||
|     return std::real(nrm);  |     return std::real(nrm);  | ||||||
|   } |   } | ||||||
|  |  | ||||||
| // Double inner product |  | ||||||
|     template<class vobj> |     template<class vobj> | ||||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  |     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  | ||||||
|     { |     { | ||||||
|       typedef typename vobj::scalar_type scalar_type; |       typedef typename vobj::scalar_type scalar_type; | ||||||
|   typedef typename vobj::vector_typeD vector_type; |       typedef typename vobj::vector_type vector_type; | ||||||
|       scalar_type  nrm; |       scalar_type  nrm; | ||||||
|  |  | ||||||
|       GridBase *grid = left._grid; |       GridBase *grid = left._grid; | ||||||
|  |  | ||||||
|       std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); |       std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); | ||||||
|  |       for(int i=0;i<grid->SumArraySize();i++){ | ||||||
|  | 	sumarray[i]=zero; | ||||||
|  |       } | ||||||
|  |  | ||||||
|       parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ |       parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||||
| 	int nwork, mywork, myoff; | 	int nwork, mywork, myoff; | ||||||
| 	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | 	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | ||||||
| 	 | 	 | ||||||
|     decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation | 	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation | ||||||
|         for(int ss=myoff;ss<mywork+myoff; ss++){ |         for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||||
|       vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]); | 	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); | ||||||
| 	} | 	} | ||||||
| 	sumarray[thr]=TensorRemove(vnrm) ; | 	sumarray[thr]=TensorRemove(vnrm) ; | ||||||
|       } |       } | ||||||
| @@ -103,8 +103,8 @@ inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     template<class vobj> |     template<class vobj> | ||||||
| inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) |     inline typename vobj::scalar_object sum(const Lattice<vobj> &arg){ | ||||||
| { |  | ||||||
|       GridBase *grid=arg._grid; |       GridBase *grid=arg._grid; | ||||||
|       int Nsimd = grid->Nsimd(); |       int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
| @@ -142,21 +142,16 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) | |||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc... |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | ||||||
| { | { | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   // FIXME precision promoted summation |  | ||||||
|   // may be important for correlation functions |  | ||||||
|   // But easily avoided by using double precision fields |  | ||||||
|   /////////////////////////////////////////////////////// |  | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   GridBase  *grid = Data._grid; |   GridBase  *grid = Data._grid; | ||||||
|   assert(grid!=NULL); |   assert(grid!=NULL); | ||||||
|  |  | ||||||
|  |   // FIXME | ||||||
|  |   // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl; | ||||||
|  |  | ||||||
|   const int    Nd = grid->_ndimension; |   const int    Nd = grid->_ndimension; | ||||||
|   const int Nsimd = grid->Nsimd(); |   const int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
| @@ -171,28 +166,20 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars |   std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars | ||||||
|   std::vector<sobj> extracted(Nsimd);     // splitting the SIMD |   std::vector<sobj> extracted(Nsimd);     // splitting the SIMD | ||||||
|  |  | ||||||
|   result.resize(fd); // And then global sum to return the same vector to every node  |   result.resize(fd); // And then global sum to return the same vector to every node for IO to file | ||||||
|   for(int r=0;r<rd;r++){ |   for(int r=0;r<rd;r++){ | ||||||
|     lvSum[r]=zero; |     lvSum[r]=zero; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   int e1=    grid->_slice_nblock[orthogdim]; |   std::vector<int>  coor(Nd);   | ||||||
|   int e2=    grid->_slice_block [orthogdim]; |  | ||||||
|   int stride=grid->_slice_stride[orthogdim]; |  | ||||||
|  |  | ||||||
|   // sum over reduced dimension planes, breaking out orthog dir |   // sum over reduced dimension planes, breaking out orthog dir | ||||||
|   // Parallel over orthog direction |  | ||||||
|   parallel_for(int r=0;r<rd;r++){ |  | ||||||
|  |  | ||||||
|     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  |   for(int ss=0;ss<grid->oSites();ss++){ | ||||||
|  |     Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions); | ||||||
|     for(int n=0;n<e1;n++){ |     int r = coor[orthogdim]; | ||||||
|       for(int b=0;b<e2;b++){ |  | ||||||
| 	int ss= so+n*stride+b; |  | ||||||
|     lvSum[r]=lvSum[r]+Data._odata[ss]; |     lvSum[r]=lvSum[r]+Data._odata[ss]; | ||||||
|   }   |   }   | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Sum across simd lanes in the plane, breaking out orthog dir. |   // Sum across simd lanes in the plane, breaking out orthog dir. | ||||||
|   std::vector<int> icoor(Nd); |   std::vector<int> icoor(Nd); | ||||||
| @@ -227,304 +214,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|  |  | ||||||
|     result[t]=gsum; |     result[t]=gsum; | ||||||
|   } |   } | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)  |  | ||||||
| { |  | ||||||
|   typedef typename vobj::vector_type   vector_type; |  | ||||||
|   typedef typename vobj::scalar_type   scalar_type; |  | ||||||
|   GridBase  *grid = lhs._grid; |  | ||||||
|   assert(grid!=NULL); |  | ||||||
|   conformable(grid,rhs._grid); |  | ||||||
|  |  | ||||||
|   const int    Nd = grid->_ndimension; |  | ||||||
|   const int Nsimd = grid->Nsimd(); |  | ||||||
|  |  | ||||||
|   assert(orthogdim >= 0); |  | ||||||
|   assert(orthogdim < Nd); |  | ||||||
|  |  | ||||||
|   int fd=grid->_fdimensions[orthogdim]; |  | ||||||
|   int ld=grid->_ldimensions[orthogdim]; |  | ||||||
|   int rd=grid->_rdimensions[orthogdim]; |  | ||||||
|  |  | ||||||
|   std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first |  | ||||||
|   std::vector<scalar_type > lsSum(ld,scalar_type(0.0));                    // sum across these down to scalars |  | ||||||
|   std::vector<iScalar<scalar_type> > extracted(Nsimd);                  // splitting the SIMD |  | ||||||
|  |  | ||||||
|   result.resize(fd); // And then global sum to return the same vector to every node for IO to file |  | ||||||
|   for(int r=0;r<rd;r++){ |  | ||||||
|     lvSum[r]=zero; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   int e1=    grid->_slice_nblock[orthogdim]; |  | ||||||
|   int e2=    grid->_slice_block [orthogdim]; |  | ||||||
|   int stride=grid->_slice_stride[orthogdim]; |  | ||||||
|  |  | ||||||
|   parallel_for(int r=0;r<rd;r++){ |  | ||||||
|  |  | ||||||
|     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  |  | ||||||
|  |  | ||||||
|     for(int n=0;n<e1;n++){ |  | ||||||
|       for(int b=0;b<e2;b++){ |  | ||||||
| 	int ss= so+n*stride+b; |  | ||||||
| 	vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss])); |  | ||||||
| 	lvSum[r]=lvSum[r]+vv; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Sum across simd lanes in the plane, breaking out orthog dir. |  | ||||||
|   std::vector<int> icoor(Nd); |  | ||||||
|   for(int rt=0;rt<rd;rt++){ |  | ||||||
|  |  | ||||||
|     iScalar<vector_type> temp;  |  | ||||||
|     temp._internal = lvSum[rt]; |  | ||||||
|     extract(temp,extracted); |  | ||||||
|  |  | ||||||
|     for(int idx=0;idx<Nsimd;idx++){ |  | ||||||
|  |  | ||||||
|       grid->iCoorFromIindex(icoor,idx); |  | ||||||
|  |  | ||||||
|       int ldx =rt+icoor[orthogdim]*rd; |  | ||||||
|  |  | ||||||
|       lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal; |  | ||||||
|  |  | ||||||
| } | } | ||||||
|   } |  | ||||||
|    |  | ||||||
|   // sum over nodes. |  | ||||||
|   scalar_type gsum; |  | ||||||
|   for(int t=0;t<fd;t++){ |  | ||||||
|     int pt = t/ld; // processor plane |  | ||||||
|     int lt = t%ld; |  | ||||||
|     if ( pt == grid->_processor_coor[orthogdim] ) { |  | ||||||
|       gsum=lsSum[lt]; |  | ||||||
|     } else { |  | ||||||
|       gsum=scalar_type(0.0); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     grid->GlobalSum(gsum); |  | ||||||
|  |  | ||||||
|     result[t]=gsum; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)  |  | ||||||
| { |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|    |  | ||||||
|   int Nblock = rhs._grid->GlobalDimensions()[Orthog]; |  | ||||||
|   std::vector<ComplexD> ip(Nblock); |  | ||||||
|   sn.resize(Nblock); |  | ||||||
|    |  | ||||||
|   sliceInnerProductVector(ip,rhs,rhs,Orthog); |  | ||||||
|   for(int ss=0;ss<Nblock;ss++){ |  | ||||||
|     sn[ss] = real(ip[ss]); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, |  | ||||||
| 			    int orthogdim,RealD scale=1.0)  |  | ||||||
| {     |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|   typedef typename vobj::tensor_reduced tensor_reduced; |  | ||||||
|    |  | ||||||
|   GridBase *grid  = X._grid; |  | ||||||
|  |  | ||||||
|   int Nsimd  =grid->Nsimd(); |  | ||||||
|   int Nblock =grid->GlobalDimensions()[orthogdim]; |  | ||||||
|  |  | ||||||
|   int fd     =grid->_fdimensions[orthogdim]; |  | ||||||
|   int ld     =grid->_ldimensions[orthogdim]; |  | ||||||
|   int rd     =grid->_rdimensions[orthogdim]; |  | ||||||
|  |  | ||||||
|   int e1     =grid->_slice_nblock[orthogdim]; |  | ||||||
|   int e2     =grid->_slice_block [orthogdim]; |  | ||||||
|   int stride =grid->_slice_stride[orthogdim]; |  | ||||||
|  |  | ||||||
|   std::vector<int> icoor; |  | ||||||
|  |  | ||||||
|   for(int r=0;r<rd;r++){ |  | ||||||
|  |  | ||||||
|     int so=r*grid->_ostride[orthogdim]; // base offset for start of plane  |  | ||||||
|  |  | ||||||
|     vector_type    av; |  | ||||||
|  |  | ||||||
|     for(int l=0;l<Nsimd;l++){ |  | ||||||
|       grid->iCoorFromIindex(icoor,l); |  | ||||||
|       int ldx =r+icoor[orthogdim]*rd; |  | ||||||
|       scalar_type *as =(scalar_type *)&av; |  | ||||||
|       as[l] = scalar_type(a[ldx])*scale; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     tensor_reduced at; at=av; |  | ||||||
|  |  | ||||||
|     parallel_for_nest2(int n=0;n<e1;n++){ |  | ||||||
|       for(int b=0;b<e2;b++){ |  | ||||||
| 	int ss= so+n*stride+b; |  | ||||||
| 	R._odata[ss] = at*X._odata[ss]+Y._odata[ss]; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| /* |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, |  | ||||||
| 			     int Orthog,RealD scale=1.0)  |  | ||||||
| {     |  | ||||||
|   // FIXME: Implementation is slow |  | ||||||
|   // Best base the linear combination by constructing a  |  | ||||||
|   // set of vectors of size grid->_rdimensions[Orthog]. |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|    |  | ||||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; |  | ||||||
|    |  | ||||||
|   GridBase *FullGrid  = X._grid; |  | ||||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |  | ||||||
|    |  | ||||||
|   Lattice<vobj> Xslice(SliceGrid); |  | ||||||
|   Lattice<vobj> Rslice(SliceGrid); |  | ||||||
|   // If we based this on Cshift it would work for spread out |  | ||||||
|   // but it would be even slower |  | ||||||
|   for(int i=0;i<Nblock;i++){ |  | ||||||
|     ExtractSlice(Rslice,Y,i,Orthog); |  | ||||||
|     ExtractSlice(Xslice,X,i,Orthog); |  | ||||||
|     Rslice = Rslice + Xslice*(scale*a[i]); |  | ||||||
|     InsertSlice(Rslice,R,i,Orthog); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  |  | ||||||
|   { |  | ||||||
|     // FIXME: Implementation is slow |  | ||||||
|     // Look at localInnerProduct implementation, |  | ||||||
|     // and do inside a site loop with block strided iterators |  | ||||||
|     typedef typename vobj::scalar_object sobj; |  | ||||||
|     typedef typename vobj::scalar_type scalar_type; |  | ||||||
|     typedef typename vobj::vector_type vector_type; |  | ||||||
|     typedef typename vobj::tensor_reduced scalar; |  | ||||||
|     typedef typename scalar::scalar_object  scomplex; |  | ||||||
|    |  | ||||||
|     int Nblock = lhs._grid->GlobalDimensions()[Orthog]; |  | ||||||
|  |  | ||||||
|     vec.resize(Nblock); |  | ||||||
|     std::vector<scomplex> sip(Nblock); |  | ||||||
|     Lattice<scalar> IP(lhs._grid);  |  | ||||||
|  |  | ||||||
|     IP=localInnerProduct(lhs,rhs); |  | ||||||
|     sliceSum(IP,sip,Orthog); |  | ||||||
|    |  | ||||||
|     for(int ss=0;ss<Nblock;ss++){ |  | ||||||
|       vec[ss] = TensorRemove(sip[ss]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| */ |  | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
| // FIXME: Implementation is slow |  | ||||||
| // If we based this on Cshift it would work for spread out |  | ||||||
| // but it would be even slower |  | ||||||
| // |  | ||||||
| // Repeated extract slice is inefficient |  | ||||||
| // |  | ||||||
| // Best base the linear combination by constructing a  |  | ||||||
| // set of vectors of size grid->_rdimensions[Orthog]. |  | ||||||
| ////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
| inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) |  | ||||||
| { |  | ||||||
|   int NN    = BlockSolverGrid->_ndimension; |  | ||||||
|   int nsimd = BlockSolverGrid->Nsimd(); |  | ||||||
|    |  | ||||||
|   std::vector<int> latt_phys(0); |  | ||||||
|   std::vector<int> simd_phys(0); |  | ||||||
|   std::vector<int>  mpi_phys(0); |  | ||||||
|    |  | ||||||
|   for(int d=0;d<NN;d++){ |  | ||||||
|     if( d!=Orthog ) {  |  | ||||||
|       latt_phys.push_back(BlockSolverGrid->_fdimensions[d]); |  | ||||||
|       simd_phys.push_back(BlockSolverGrid->_simd_layout[d]); |  | ||||||
|       mpi_phys.push_back(BlockSolverGrid->_processors[d]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  |  | ||||||
| {     |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|  |  | ||||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; |  | ||||||
|    |  | ||||||
|   GridBase *FullGrid  = X._grid; |  | ||||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |  | ||||||
|    |  | ||||||
|   Lattice<vobj> Xslice(SliceGrid); |  | ||||||
|   Lattice<vobj> Rslice(SliceGrid); |  | ||||||
|    |  | ||||||
|   for(int i=0;i<Nblock;i++){ |  | ||||||
|     ExtractSlice(Rslice,Y,i,Orthog); |  | ||||||
|     for(int j=0;j<Nblock;j++){ |  | ||||||
|       ExtractSlice(Xslice,X,j,Orthog); |  | ||||||
|       Rslice = Rslice + Xslice*(scale*aa(j,i)); |  | ||||||
|     } |  | ||||||
|     InsertSlice(Rslice,R,i,Orthog); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  |  | ||||||
| { |  | ||||||
|   // FIXME: Implementation is slow |  | ||||||
|   // Not sure of best solution.. think about it |  | ||||||
|   typedef typename vobj::scalar_object sobj; |  | ||||||
|   typedef typename vobj::scalar_type scalar_type; |  | ||||||
|   typedef typename vobj::vector_type vector_type; |  | ||||||
|    |  | ||||||
|   GridBase *FullGrid  = lhs._grid; |  | ||||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); |  | ||||||
|    |  | ||||||
|   int Nblock = FullGrid->GlobalDimensions()[Orthog]; |  | ||||||
|    |  | ||||||
|   Lattice<vobj> Lslice(SliceGrid); |  | ||||||
|   Lattice<vobj> Rslice(SliceGrid); |  | ||||||
|    |  | ||||||
|   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); |  | ||||||
|    |  | ||||||
|   for(int i=0;i<Nblock;i++){ |  | ||||||
|     ExtractSlice(Lslice,lhs,i,Orthog); |  | ||||||
|     for(int j=0;j<Nblock;j++){ |  | ||||||
|       ExtractSlice(Rslice,rhs,j,Orthog); |  | ||||||
|       mat(i,j) = innerProduct(Lslice,Rslice); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| #undef FORCE_DIAG |  | ||||||
| #ifdef FORCE_DIAG |  | ||||||
|   for(int i=0;i<Nblock;i++){ |  | ||||||
|     for(int j=0;j<Nblock;j++){ |  | ||||||
|       if ( i != j ) mat(i,j)=0.0; |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| #endif |  | ||||||
|   return; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } /*END NAMESPACE GRID*/ |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -359,7 +359,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out) | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -401,7 +401,7 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice | |||||||
| } | } | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog) | void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -444,7 +444,7 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -110,8 +110,8 @@ public: | |||||||
|   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ |   friend std::ostream& operator<< (std::ostream& stream, Logger& log){ | ||||||
|  |  | ||||||
|     if ( log.active ) { |     if ( log.active ) { | ||||||
|       stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; |       stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : "; | ||||||
|       stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; |       stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : "; | ||||||
|       if ( log.timestamp ) { |       if ( log.timestamp ) { | ||||||
| 	StopWatch.Stop(); | 	StopWatch.Stop(); | ||||||
| 	GridTime now = StopWatch.Elapsed(); | 	GridTime now = StopWatch.Elapsed(); | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								lib/qcd/action/fermion/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/qcd/action/fermion/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -414,6 +414,8 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co | |||||||
|     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code |     omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code | ||||||
|     bs[i] = 0.5*(bpc/omega[i] + bmc); |     bs[i] = 0.5*(bpc/omega[i] + bmc); | ||||||
|     cs[i] = 0.5*(bpc/omega[i] - bmc); |     cs[i] = 0.5*(bpc/omega[i] - bmc); | ||||||
|  |     std::cout<<GridLogMessage << "CayleyFermion5D "<<i<<" bs="<<bs[i]<<" cs="<<cs[i]<< std::endl; | ||||||
|  |  | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   //////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -58,7 +58,6 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | |||||||
| #include <Grid/qcd/action/fermion/DomainWallFermion.h> | #include <Grid/qcd/action/fermion/DomainWallFermion.h> | ||||||
| #include <Grid/qcd/action/fermion/MobiusFermion.h> | #include <Grid/qcd/action/fermion/MobiusFermion.h> | ||||||
| #include <Grid/qcd/action/fermion/ZMobiusFermion.h> | #include <Grid/qcd/action/fermion/ZMobiusFermion.h> | ||||||
| #include <Grid/qcd/action/fermion/SchurDiagTwoKappa.h> |  | ||||||
| #include <Grid/qcd/action/fermion/ScaledShamirFermion.h> | #include <Grid/qcd/action/fermion/ScaledShamirFermion.h> | ||||||
| #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> | #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> | ||||||
| #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> | #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> | ||||||
|   | |||||||
| @@ -160,6 +160,8 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const | |||||||
|     PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); |     PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   std::cout << " Umu " << Umu._odata[0]<<std::endl; | ||||||
|  |   std::cout << " UUUmu " << UUUmu._odata[0]<<std::endl; | ||||||
|   pickCheckerboard(Even, UmuEven, Umu); |   pickCheckerboard(Even, UmuEven, Umu); | ||||||
|   pickCheckerboard(Odd,  UmuOdd , Umu); |   pickCheckerboard(Odd,  UmuOdd , Umu); | ||||||
|   pickCheckerboard(Even, UUUmuEven, UUUmu); |   pickCheckerboard(Even, UUUmuEven, UUUmu); | ||||||
|   | |||||||
| @@ -1,102 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: SchurDiagTwoKappa.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2017 |  | ||||||
|  |  | ||||||
| Author: Christoph Lehner |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #ifndef  _SCHUR_DIAG_TWO_KAPPA_H |  | ||||||
| #define  _SCHUR_DIAG_TWO_KAPPA_H |  | ||||||
|  |  | ||||||
| namespace Grid { |  | ||||||
|  |  | ||||||
|   // This is specific to (Z)mobius fermions |  | ||||||
|   template<class Matrix, class Field> |  | ||||||
|     class KappaSimilarityTransform { |  | ||||||
|   public: |  | ||||||
|     INHERIT_IMPL_TYPES(Matrix); |  | ||||||
|     std::vector<Coeff_t> kappa, kappaDag, kappaInv, kappaInvDag; |  | ||||||
|  |  | ||||||
|     KappaSimilarityTransform (Matrix &zmob) { |  | ||||||
|       for (int i=0;i<(int)zmob.bs.size();i++) { |  | ||||||
| 	Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); |  | ||||||
| 	kappa.push_back( k ); |  | ||||||
| 	kappaDag.push_back( conj(k) ); |  | ||||||
| 	kappaInv.push_back( 1.0 / k ); |  | ||||||
| 	kappaInvDag.push_back( 1.0 / conj(k) ); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   template<typename vobj> |  | ||||||
|     void sscale(const Lattice<vobj>& in, Lattice<vobj>& out, Coeff_t* s) { |  | ||||||
|     GridBase *grid=out._grid; |  | ||||||
|     out.checkerboard = in.checkerboard; |  | ||||||
|     assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now |  | ||||||
|     int Ls = grid->_rdimensions[0]; |  | ||||||
|     parallel_for(int ss=0;ss<grid->oSites();ss++){ |  | ||||||
|       vobj tmp = s[ss % Ls]*in._odata[ss]; |  | ||||||
|       vstream(out._odata[ss],tmp); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) { |  | ||||||
|     sscale(in,out,s); |  | ||||||
|     return norm2(out); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   virtual RealD M       (const Field& in, Field& out) { return sscale_norm(in,out,&kappa[0]);   } |  | ||||||
|   virtual RealD MDag    (const Field& in, Field& out) { return sscale_norm(in,out,&kappaDag[0]);} |  | ||||||
|   virtual RealD MInv    (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);} |  | ||||||
|   virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);} |  | ||||||
|  |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   template<class Matrix,class Field> |  | ||||||
|     class SchurDiagTwoKappaOperator :  public SchurOperatorBase<Field> { |  | ||||||
|   public: |  | ||||||
|     KappaSimilarityTransform<Matrix, Field> _S; |  | ||||||
|     SchurDiagTwoOperator<Matrix, Field> _Mat; |  | ||||||
|  |  | ||||||
|     SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; |  | ||||||
|  |  | ||||||
|     virtual  RealD Mpc      (const Field &in, Field &out) { |  | ||||||
|       Field tmp(in._grid); |  | ||||||
|  |  | ||||||
|       _S.MInv(in,out); |  | ||||||
|       _Mat.Mpc(out,tmp); |  | ||||||
|       return _S.M(tmp,out); |  | ||||||
|  |  | ||||||
|     } |  | ||||||
|     virtual  RealD MpcDag   (const Field &in, Field &out){ |  | ||||||
|       Field tmp(in._grid); |  | ||||||
|  |  | ||||||
|       _S.MDag(in,out); |  | ||||||
|       _Mat.MpcDag(out,tmp); |  | ||||||
|       return _S.MInvDag(tmp,out); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #endif |  | ||||||
							
								
								
									
										0
									
								
								lib/qcd/spin/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/qcd/spin/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								lib/qcd/utils/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/qcd/utils/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -54,7 +54,7 @@ THE SOFTWARE. | |||||||
|  |  | ||||||
| #define GRID_MACRO_EMPTY() | #define GRID_MACRO_EMPTY() | ||||||
|  |  | ||||||
| #define GRID_MACRO_EVAL(...)     GRID_MACRO_EVAL64(__VA_ARGS__) | #define GRID_MACRO_EVAL(...)     GRID_MACRO_EVAL1024(__VA_ARGS__) | ||||||
| #define GRID_MACRO_EVAL1024(...) GRID_MACRO_EVAL512(GRID_MACRO_EVAL512(__VA_ARGS__)) | #define GRID_MACRO_EVAL1024(...) GRID_MACRO_EVAL512(GRID_MACRO_EVAL512(__VA_ARGS__)) | ||||||
| #define GRID_MACRO_EVAL512(...)  GRID_MACRO_EVAL256(GRID_MACRO_EVAL256(__VA_ARGS__)) | #define GRID_MACRO_EVAL512(...)  GRID_MACRO_EVAL256(GRID_MACRO_EVAL256(__VA_ARGS__)) | ||||||
| #define GRID_MACRO_EVAL256(...)  GRID_MACRO_EVAL128(GRID_MACRO_EVAL128(__VA_ARGS__)) | #define GRID_MACRO_EVAL256(...)  GRID_MACRO_EVAL128(GRID_MACRO_EVAL128(__VA_ARGS__)) | ||||||
|   | |||||||
| @@ -377,8 +377,8 @@ namespace Optimization { | |||||||
|       b0 = _mm256_extractf128_si256(b,0); |       b0 = _mm256_extractf128_si256(b,0); | ||||||
|       a1 = _mm256_extractf128_si256(a,1); |       a1 = _mm256_extractf128_si256(a,1); | ||||||
|       b1 = _mm256_extractf128_si256(b,1); |       b1 = _mm256_extractf128_si256(b,1); | ||||||
|       a0 = _mm_mullo_epi32(a0,b0); |       a0 = _mm_mul_epi32(a0,b0); | ||||||
|       a1 = _mm_mullo_epi32(a1,b1); |       a1 = _mm_mul_epi32(a1,b1); | ||||||
|       return _mm256_set_m128i(a1,a0); |       return _mm256_set_m128i(a1,a0); | ||||||
| #endif | #endif | ||||||
| #if defined (AVX2) | #if defined (AVX2) | ||||||
| @@ -470,52 +470,7 @@ namespace Optimization { | |||||||
|       return in; |       return in; | ||||||
|     }; |     }; | ||||||
|   }; |   }; | ||||||
| #define USE_FP16 |  | ||||||
|   struct PrecisionChange { |  | ||||||
|     static inline __m256i StoH (__m256 a,__m256 b) { |  | ||||||
|       __m256 h; |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       __m128i ha = _mm256_cvtps_ph(a,0); |  | ||||||
|       __m128i hb = _mm256_cvtps_ph(b,0); |  | ||||||
|       h = _mm256_castps128_ps256(ha); |  | ||||||
|       h = _mm256_insertf128_ps(h,hb,1); |  | ||||||
| #else  |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|       return h; |  | ||||||
|     } |  | ||||||
|     static inline void  HtoS (__m256i h,__m256 &sa,__m256 &sb) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       sa = _mm256_cvtph_ps(_mm256_extractf128_ps(h,0)); |  | ||||||
|       sb = _mm256_cvtph_ps(_mm256_extractf128_ps(h,1)); |  | ||||||
| #else  |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
|     static inline __m256 DtoS (__m256d a,__m256d b) { |  | ||||||
|       __m128 sa = _mm256_cvtpd_ps(a); |  | ||||||
|       __m128 sb = _mm256_cvtpd_ps(b); |  | ||||||
|       __m256 s = _mm256_castps128_ps256(sa); |  | ||||||
|       s = _mm256_insertf128_ps(s,sb,1); |  | ||||||
|       return s; |  | ||||||
|     } |  | ||||||
|     static inline void StoD (__m256 s,__m256d &a,__m256d &b) { |  | ||||||
|       a = _mm256_cvtps_pd(_mm256_extractf128_ps(s,0)); |  | ||||||
|       b = _mm256_cvtps_pd(_mm256_extractf128_ps(s,1)); |  | ||||||
|     } |  | ||||||
|     static inline __m256i DtoH (__m256d a,__m256d b,__m256d c,__m256d d) { |  | ||||||
|       __m256 sa,sb; |  | ||||||
|       sa = DtoS(a,b); |  | ||||||
|       sb = DtoS(c,d); |  | ||||||
|       return StoH(sa,sb); |  | ||||||
|     } |  | ||||||
|     static inline void HtoD (__m256i h,__m256d &a,__m256d &b,__m256d &c,__m256d &d) { |  | ||||||
|       __m256 sa,sb; |  | ||||||
|       HtoS(h,sa,sb); |  | ||||||
|       StoD(sa,a,b); |  | ||||||
|       StoD(sb,c,d); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|   struct Exchange{ |   struct Exchange{ | ||||||
|     // 3210 ordering |     // 3210 ordering | ||||||
|     static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ |     static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){ | ||||||
| @@ -720,7 +675,6 @@ namespace Optimization { | |||||||
| ////////////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Here assign types  | // Here assign types  | ||||||
|  |  | ||||||
|   typedef __m256i SIMD_Htype;  // Single precision type |  | ||||||
|   typedef __m256  SIMD_Ftype; // Single precision type |   typedef __m256  SIMD_Ftype; // Single precision type | ||||||
|   typedef __m256d SIMD_Dtype; // Double precision type |   typedef __m256d SIMD_Dtype; // Double precision type | ||||||
|   typedef __m256i SIMD_Itype; // Integer type |   typedef __m256i SIMD_Itype; // Integer type | ||||||
|   | |||||||
| @@ -235,9 +235,11 @@ namespace Optimization { | |||||||
|     inline void mac(__m512 &a, __m512 b, __m512 c){          |     inline void mac(__m512 &a, __m512 b, __m512 c){          | ||||||
|        a= _mm512_fmadd_ps( b, c, a);                          |        a= _mm512_fmadd_ps( b, c, a);                          | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     inline void mac(__m512d &a, __m512d b, __m512d c){ |     inline void mac(__m512d &a, __m512d b, __m512d c){ | ||||||
|       a= _mm512_fmadd_pd( b, c, a);                    |       a= _mm512_fmadd_pd( b, c, a);                    | ||||||
|     }                                              |     }                                              | ||||||
|  |  | ||||||
|     // Real float |     // Real float | ||||||
|     inline __m512 operator()(__m512 a, __m512 b){ |     inline __m512 operator()(__m512 a, __m512 b){ | ||||||
|       return _mm512_mul_ps(a,b); |       return _mm512_mul_ps(a,b); | ||||||
| @@ -340,52 +342,7 @@ namespace Optimization { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
| #define USE_FP16 |  | ||||||
|   struct PrecisionChange { |  | ||||||
|     static inline __m512i StoH (__m512 a,__m512 b) { |  | ||||||
|       __m512i h; |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       __m256i ha = _mm512_cvtps_ph(a,0); |  | ||||||
|       __m256i hb = _mm512_cvtps_ph(b,0); |  | ||||||
|       h =(__m512i) _mm512_castps256_ps512((__m256)ha); |  | ||||||
|       h =(__m512i) _mm512_insertf64x4((__m512d)h,(__m256d)hb,1); |  | ||||||
| #else |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|       return h; |  | ||||||
|     } |  | ||||||
|     static inline void  HtoS (__m512i h,__m512 &sa,__m512 &sb) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       sa = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,0)); |  | ||||||
|       sb = _mm512_cvtph_ps((__m256i)_mm512_extractf64x4_pd((__m512d)h,1)); |  | ||||||
| #else |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
|     static inline __m512 DtoS (__m512d a,__m512d b) { |  | ||||||
|       __m256 sa = _mm512_cvtpd_ps(a); |  | ||||||
|       __m256 sb = _mm512_cvtpd_ps(b); |  | ||||||
|       __m512 s = _mm512_castps256_ps512(sa); |  | ||||||
|       s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1); |  | ||||||
|       return s; |  | ||||||
|     } |  | ||||||
|     static inline void StoD (__m512 s,__m512d &a,__m512d &b) { |  | ||||||
|       a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0)); |  | ||||||
|       b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1)); |  | ||||||
|     } |  | ||||||
|     static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) { |  | ||||||
|       __m512 sa,sb; |  | ||||||
|       sa = DtoS(a,b); |  | ||||||
|       sb = DtoS(c,d); |  | ||||||
|       return StoH(sa,sb); |  | ||||||
|     } |  | ||||||
|     static inline void HtoD (__m512i h,__m512d &a,__m512d &b,__m512d &c,__m512d &d) { |  | ||||||
|       __m512 sa,sb; |  | ||||||
|       HtoS(h,sa,sb); |  | ||||||
|       StoD(sa,a,b); |  | ||||||
|       StoD(sb,c,d); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|   // On extracting face: Ah Al , Bh Bl -> Ah Bh, Al Bl |   // On extracting face: Ah Al , Bh Bl -> Ah Bh, Al Bl | ||||||
|   // On merging buffers: Ah,Bh , Al Bl -> Ah Al, Bh, Bl |   // On merging buffers: Ah,Bh , Al Bl -> Ah Al, Bh, Bl | ||||||
|   // The operation is its own inverse |   // The operation is its own inverse | ||||||
| @@ -582,8 +539,6 @@ namespace Optimization { | |||||||
| ////////////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Here assign types  | // Here assign types  | ||||||
|  |  | ||||||
|  |  | ||||||
|   typedef __m512i SIMD_Htype;  // Single precision type |  | ||||||
|   typedef __m512 SIMD_Ftype;  // Single precision type |   typedef __m512 SIMD_Ftype;  // Single precision type | ||||||
|   typedef __m512d SIMD_Dtype; // Double precision type |   typedef __m512d SIMD_Dtype; // Double precision type | ||||||
|   typedef __m512i SIMD_Itype; // Integer type |   typedef __m512i SIMD_Itype; // Integer type | ||||||
|   | |||||||
| @@ -279,101 +279,6 @@ namespace Optimization { | |||||||
|    |    | ||||||
|   #undef timesi |   #undef timesi | ||||||
|  |  | ||||||
|   struct PrecisionChange { |  | ||||||
|     static inline vech StoH (const vecf &a,const vecf &b) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       vech ret; |  | ||||||
|       vech *ha = (vech *)&a; |  | ||||||
|       vech *hb = (vech *)&b; |  | ||||||
|       const int nf = W<float>::r; |  | ||||||
|       //      VECTOR_FOR(i, nf,1){ ret.v[i]    = ( (uint16_t *) &a.v[i])[1] ; } |  | ||||||
|       //      VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; } |  | ||||||
|       VECTOR_FOR(i, nf,1){ ret.v[i]    = ha->v[2*i+1]; } |  | ||||||
|       VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; } |  | ||||||
| #else |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|       return ret; |  | ||||||
|     } |  | ||||||
|     static inline void  HtoS (vech h,vecf &sa,vecf &sb) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       const int nf = W<float>::r; |  | ||||||
|       const int nh = W<uint16_t>::r; |  | ||||||
|       vech *ha = (vech *)&sa; |  | ||||||
|       vech *hb = (vech *)&sb; |  | ||||||
|       VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; } |  | ||||||
|       //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];} |  | ||||||
|       //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];} |  | ||||||
|       VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; } |  | ||||||
|       VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; } |  | ||||||
| #else |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
|     static inline vecf DtoS (vecd a,vecd b) { |  | ||||||
|       const int nd = W<double>::r; |  | ||||||
|       const int nf = W<float>::r; |  | ||||||
|       vecf ret; |  | ||||||
|       VECTOR_FOR(i, nd,1){ ret.v[i]    = a.v[i] ; } |  | ||||||
|       VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; } |  | ||||||
|       return ret; |  | ||||||
|     } |  | ||||||
|     static inline void StoD (vecf s,vecd &a,vecd &b) { |  | ||||||
|       const int nd = W<double>::r; |  | ||||||
|       VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; } |  | ||||||
|       VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; } |  | ||||||
|     } |  | ||||||
|     static inline vech DtoH (vecd a,vecd b,vecd c,vecd d) { |  | ||||||
|       vecf sa,sb; |  | ||||||
|       sa = DtoS(a,b); |  | ||||||
|       sb = DtoS(c,d); |  | ||||||
|       return StoH(sa,sb); |  | ||||||
|     } |  | ||||||
|     static inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) { |  | ||||||
|       vecf sa,sb; |  | ||||||
|       HtoS(h,sa,sb); |  | ||||||
|       StoD(sa,a,b); |  | ||||||
|       StoD(sb,c,d); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////// |  | ||||||
|   // Exchange support |  | ||||||
|   struct Exchange{ |  | ||||||
|  |  | ||||||
|     template <typename T,int n> |  | ||||||
|     static inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){ |  | ||||||
|       const int w = W<T>::r; |  | ||||||
|       unsigned int mask = w >> (n + 1); |  | ||||||
|       //      std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl; |  | ||||||
|       VECTOR_FOR(i, w, 1) {	 |  | ||||||
| 	int j1 = i&(~mask); |  | ||||||
| 	if  ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];} |  | ||||||
| 	else                  { out1.v[i]=in2.v[j1];} |  | ||||||
| 	int j2 = i|mask; |  | ||||||
| 	if  ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];} |  | ||||||
| 	else                  { out2.v[i]=in2.v[j2];} |  | ||||||
|       }       |  | ||||||
|     } |  | ||||||
|     template <typename T> |  | ||||||
|     static inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){ |  | ||||||
|       ExchangeN<T,0>(out1,out2,in1,in2); |  | ||||||
|     }; |  | ||||||
|     template <typename T> |  | ||||||
|     static inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){ |  | ||||||
|       ExchangeN<T,1>(out1,out2,in1,in2); |  | ||||||
|     }; |  | ||||||
|     template <typename T> |  | ||||||
|     static inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){ |  | ||||||
|       ExchangeN<T,2>(out1,out2,in1,in2); |  | ||||||
|     }; |  | ||||||
|     template <typename T> |  | ||||||
|     static inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){ |  | ||||||
|       ExchangeN<T,3>(out1,out2,in1,in2); |  | ||||||
|     }; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////// |   ////////////////////////////////////////////// | ||||||
|   // Some Template specialization |   // Some Template specialization | ||||||
|   #define perm(a, b, n, w)\ |   #define perm(a, b, n, w)\ | ||||||
| @@ -498,7 +403,6 @@ namespace Optimization { | |||||||
| ////////////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Here assign types  | // Here assign types  | ||||||
|  |  | ||||||
|   typedef Optimization::vech SIMD_Htype; // Reduced precision type |  | ||||||
|   typedef Optimization::vecf SIMD_Ftype; // Single precision type |   typedef Optimization::vecf SIMD_Ftype; // Single precision type | ||||||
|   typedef Optimization::vecd SIMD_Dtype; // Double precision type |   typedef Optimization::vecd SIMD_Dtype; // Double precision type | ||||||
|   typedef Optimization::veci SIMD_Itype; // Integer type |   typedef Optimization::veci SIMD_Itype; // Integer type | ||||||
|   | |||||||
| @@ -66,10 +66,6 @@ namespace Optimization { | |||||||
|   template <> struct W<Integer> { |   template <> struct W<Integer> { | ||||||
|     constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; |     constexpr static unsigned int r = GEN_SIMD_WIDTH/4u; | ||||||
|   }; |   }; | ||||||
|   template <> struct W<uint16_t> { |  | ||||||
|     constexpr static unsigned int c = GEN_SIMD_WIDTH/4u; |  | ||||||
|     constexpr static unsigned int r = GEN_SIMD_WIDTH/2u; |  | ||||||
|   }; |  | ||||||
|    |    | ||||||
|   // SIMD vector types |   // SIMD vector types | ||||||
|   template <typename T> |   template <typename T> | ||||||
| @@ -79,7 +75,6 @@ namespace Optimization { | |||||||
|  |  | ||||||
|   typedef vec<float>   vecf; |   typedef vec<float>   vecf; | ||||||
|   typedef vec<double>  vecd; |   typedef vec<double>  vecd; | ||||||
|   typedef vec<uint16_t>  vech; // half precision comms |  | ||||||
|   typedef vec<Integer> veci; |   typedef vec<Integer> veci; | ||||||
|    |    | ||||||
| }} | }} | ||||||
|   | |||||||
| @@ -125,6 +125,7 @@ namespace Optimization { | |||||||
|       f[2] = a.v2; |       f[2] = a.v2; | ||||||
|       f[3] = a.v3; |       f[3] = a.v3; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     //Double |     //Double | ||||||
|     inline void operator()(double *d, vector4double a){ |     inline void operator()(double *d, vector4double a){ | ||||||
|       vec_st(a, 0, d); |       vec_st(a, 0, d); | ||||||
|   | |||||||
| @@ -38,7 +38,6 @@ Author: neo <cossu@post.kek.jp> | |||||||
|  |  | ||||||
| #include <pmmintrin.h> | #include <pmmintrin.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
| namespace Optimization { | namespace Optimization { | ||||||
|  |  | ||||||
| @@ -329,56 +328,6 @@ namespace Optimization { | |||||||
|     }; |     }; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|    |  | ||||||
| #define _my_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) |  | ||||||
| #define _my_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16) |  | ||||||
|  |  | ||||||
|   struct PrecisionChange { |  | ||||||
|     static inline __m128i StoH (__m128 a,__m128 b) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       __m128i ha = _mm_cvtps_ph(a,0); |  | ||||||
|       __m128i hb = _mm_cvtps_ph(b,0); |  | ||||||
|       __m128i h =(__m128i) _mm_shuffle_ps((__m128)ha,(__m128)hb,_MM_SELECT_FOUR_FOUR(1,0,1,0)); |  | ||||||
| #else |  | ||||||
|       __m128i h = (__m128i)a; |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|       return h; |  | ||||||
|     } |  | ||||||
|     static inline void  HtoS (__m128i h,__m128 &sa,__m128 &sb) { |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|       sa = _mm_cvtph_ps(h);  |  | ||||||
|       h =  (__m128i)_my_alignr_epi32((__m128i)h,(__m128i)h,2); |  | ||||||
|       sb = _mm_cvtph_ps(h); |  | ||||||
| #else |  | ||||||
|       assert(0); |  | ||||||
| #endif |  | ||||||
|     } |  | ||||||
|     static inline __m128 DtoS (__m128d a,__m128d b) { |  | ||||||
|       __m128 sa = _mm_cvtpd_ps(a); |  | ||||||
|       __m128 sb = _mm_cvtpd_ps(b); |  | ||||||
|       __m128 s = _mm_shuffle_ps(sa,sb,_MM_SELECT_FOUR_FOUR(1,0,1,0)); |  | ||||||
|       return s; |  | ||||||
|     } |  | ||||||
|     static inline void StoD (__m128 s,__m128d &a,__m128d &b) { |  | ||||||
|       a = _mm_cvtps_pd(s); |  | ||||||
|       s = (__m128)_my_alignr_epi32((__m128i)s,(__m128i)s,2); |  | ||||||
|       b = _mm_cvtps_pd(s); |  | ||||||
|     } |  | ||||||
|     static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) { |  | ||||||
|       __m128 sa,sb; |  | ||||||
|       sa = DtoS(a,b); |  | ||||||
|       sb = DtoS(c,d); |  | ||||||
|       return StoH(sa,sb); |  | ||||||
|     } |  | ||||||
|     static inline void HtoD (__m128i h,__m128d &a,__m128d &b,__m128d &c,__m128d &d) { |  | ||||||
|       __m128 sa,sb; |  | ||||||
|       HtoS(h,sa,sb); |  | ||||||
|       StoD(sa,a,b); |  | ||||||
|       StoD(sb,c,d); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   struct Exchange{ |   struct Exchange{ | ||||||
|     // 3210 ordering |     // 3210 ordering | ||||||
|     static inline void Exchange0(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ |     static inline void Exchange0(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ | ||||||
| @@ -386,10 +335,8 @@ namespace Optimization { | |||||||
|       out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); |       out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2)); | ||||||
|     }; |     }; | ||||||
|     static inline void Exchange1(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ |     static inline void Exchange1(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ | ||||||
|       out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); /*ACEG*/ |       out1= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); | ||||||
|       out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); /*BDFH*/ |       out2= _mm_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); | ||||||
|       out1= _mm_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ |  | ||||||
|       out2= _mm_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ |  | ||||||
|     }; |     }; | ||||||
|     static inline void Exchange2(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ |     static inline void Exchange2(__m128 &out1,__m128 &out2,__m128 in1,__m128 in2){ | ||||||
|       assert(0); |       assert(0); | ||||||
| @@ -437,8 +384,13 @@ namespace Optimization { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|    |    | ||||||
|     template<int n> static inline __m128  tRotate(__m128  in){ return (__m128)_my_alignr_epi32((__m128i)in,(__m128i)in,n); }; | #ifndef _mm_alignr_epi64 | ||||||
|     template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_my_alignr_epi64((__m128i)in,(__m128i)in,n); }; | #define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) | ||||||
|  | #define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16) | ||||||
|  | #endif  | ||||||
|  |  | ||||||
|  |     template<int n> static inline __m128  tRotate(__m128  in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); }; | ||||||
|  |     template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); }; | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|   ////////////////////////////////////////////// |   ////////////////////////////////////////////// | ||||||
| @@ -498,7 +450,6 @@ namespace Optimization { | |||||||
| ////////////////////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Here assign types  | // Here assign types  | ||||||
|  |  | ||||||
|   typedef __m128i SIMD_Htype;  // Single precision type |  | ||||||
|   typedef __m128 SIMD_Ftype;  // Single precision type |   typedef __m128 SIMD_Ftype;  // Single precision type | ||||||
|   typedef __m128d SIMD_Dtype; // Double precision type |   typedef __m128d SIMD_Dtype; // Double precision type | ||||||
|   typedef __m128i SIMD_Itype; // Integer type |   typedef __m128i SIMD_Itype; // Integer type | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ | |||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
| Source file: ./lib/simd/Grid_vector_type.h | Source file: ./lib/simd/Grid_vector_types.h | ||||||
|  |  | ||||||
| Copyright (C) 2015 | Copyright (C) 2015 | ||||||
|  |  | ||||||
| @@ -358,12 +358,16 @@ class Grid_simd { | |||||||
|   { |   { | ||||||
|     if       (n==3) { |     if       (n==3) { | ||||||
|       Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v); |       Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v); | ||||||
|  |       //      std::cout << " Exchange3 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl; | ||||||
|     } else if(n==2) { |     } else if(n==2) { | ||||||
|       Optimization::Exchange::Exchange2(out1.v,out2.v,in1.v,in2.v); |       Optimization::Exchange::Exchange2(out1.v,out2.v,in1.v,in2.v); | ||||||
|  |       //      std::cout << " Exchange2 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl; | ||||||
|     } else if(n==1) { |     } else if(n==1) { | ||||||
|       Optimization::Exchange::Exchange1(out1.v,out2.v,in1.v,in2.v); |       Optimization::Exchange::Exchange1(out1.v,out2.v,in1.v,in2.v); | ||||||
|  |       //      std::cout << " Exchange1 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl; | ||||||
|     } else if(n==0) {  |     } else if(n==0) {  | ||||||
|       Optimization::Exchange::Exchange0(out1.v,out2.v,in1.v,in2.v); |       Optimization::Exchange::Exchange0(out1.v,out2.v,in1.v,in2.v); | ||||||
|  |       //      std::cout << " Exchange0 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -411,6 +415,7 @@ template <class S, class V, IfNotComplex<S> = 0> | |||||||
| inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) { | inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) { | ||||||
|   nrot = nrot % Grid_simd<S, V>::Nsimd(); |   nrot = nrot % Grid_simd<S, V>::Nsimd(); | ||||||
|   Grid_simd<S, V> ret; |   Grid_simd<S, V> ret; | ||||||
|  |   //    std::cout << "Rotate Real by "<<nrot<<std::endl; | ||||||
|   ret.v = Optimization::Rotate::rotate(b.v, nrot); |   ret.v = Optimization::Rotate::rotate(b.v, nrot); | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
| @@ -418,6 +423,7 @@ template <class S, class V, IfComplex<S> = 0> | |||||||
| inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) { | inline Grid_simd<S, V> rotate(Grid_simd<S, V> b, int nrot) { | ||||||
|   nrot = nrot % Grid_simd<S, V>::Nsimd(); |   nrot = nrot % Grid_simd<S, V>::Nsimd(); | ||||||
|   Grid_simd<S, V> ret; |   Grid_simd<S, V> ret; | ||||||
|  |   //    std::cout << "Rotate Complex by "<<nrot<<std::endl; | ||||||
|   ret.v = Optimization::Rotate::rotate(b.v, 2 * nrot); |   ret.v = Optimization::Rotate::rotate(b.v, 2 * nrot); | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
| @@ -425,12 +431,14 @@ template <class S, class V, IfNotComplex<S> =0> | |||||||
| inline void rotate( Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot) | inline void rotate( Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot) | ||||||
| { | { | ||||||
|   nrot = nrot % Grid_simd<S,V>::Nsimd(); |   nrot = nrot % Grid_simd<S,V>::Nsimd(); | ||||||
|  |   //    std::cout << "Rotate Real by "<<nrot<<std::endl; | ||||||
|   ret.v = Optimization::Rotate::rotate(b.v,nrot); |   ret.v = Optimization::Rotate::rotate(b.v,nrot); | ||||||
| } | } | ||||||
| template <class S, class V, IfComplex<S> =0>  | template <class S, class V, IfComplex<S> =0>  | ||||||
| inline void rotate(Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot) | inline void rotate(Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot) | ||||||
| { | { | ||||||
|   nrot = nrot % Grid_simd<S,V>::Nsimd(); |   nrot = nrot % Grid_simd<S,V>::Nsimd(); | ||||||
|  |   //    std::cout << "Rotate Complex by "<<nrot<<std::endl; | ||||||
|   ret.v = Optimization::Rotate::rotate(b.v,2*nrot); |   ret.v = Optimization::Rotate::rotate(b.v,2*nrot); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -690,6 +698,7 @@ inline Grid_simd<S, V> innerProduct(const Grid_simd<S, V> &l, | |||||||
|                                     const Grid_simd<S, V> &r) { |                                     const Grid_simd<S, V> &r) { | ||||||
|   return conjugate(l) * r; |   return conjugate(l) * r; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <class S, class V> | template <class S, class V> | ||||||
| inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l, | inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l, | ||||||
|                                     const Grid_simd<S, V> &r) { |                                     const Grid_simd<S, V> &r) { | ||||||
| @@ -749,67 +758,6 @@ typedef Grid_simd<std::complex<float>, SIMD_Ftype> vComplexF; | |||||||
| typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD; | typedef Grid_simd<std::complex<double>, SIMD_Dtype> vComplexD; | ||||||
| typedef Grid_simd<Integer, SIMD_Itype> vInteger; | typedef Grid_simd<Integer, SIMD_Itype> vInteger; | ||||||
|  |  | ||||||
| // Half precision; no arithmetic support |  | ||||||
| typedef Grid_simd<uint16_t, SIMD_Htype>               vRealH; |  | ||||||
| typedef Grid_simd<std::complex<uint16_t>, SIMD_Htype> vComplexH; |  | ||||||
|  |  | ||||||
| inline void precisionChange(vRealF    *out,vRealD    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x1)==0); |  | ||||||
|   for(int m=0;m*2<nvec;m++){ |  | ||||||
|     int n=m*2; |  | ||||||
|     out[m].v=Optimization::PrecisionChange::DtoS(in[n].v,in[n+1].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vRealH    *out,vRealD    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x3)==0); |  | ||||||
|   for(int m=0;m*4<nvec;m++){ |  | ||||||
|     int n=m*4; |  | ||||||
|     out[m].v=Optimization::PrecisionChange::DtoH(in[n].v,in[n+1].v,in[n+2].v,in[n+3].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vRealH    *out,vRealF    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x1)==0); |  | ||||||
|   for(int m=0;m*2<nvec;m++){ |  | ||||||
|     int n=m*2; |  | ||||||
|     out[m].v=Optimization::PrecisionChange::StoH(in[n].v,in[n+1].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vRealD    *out,vRealF    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x1)==0); |  | ||||||
|   for(int m=0;m*2<nvec;m++){ |  | ||||||
|     int n=m*2; |  | ||||||
|     Optimization::PrecisionChange::StoD(in[m].v,out[n].v,out[n+1].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vRealD    *out,vRealH    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x3)==0); |  | ||||||
|   for(int m=0;m*4<nvec;m++){ |  | ||||||
|     int n=m*4; |  | ||||||
|     Optimization::PrecisionChange::HtoD(in[m].v,out[n].v,out[n+1].v,out[n+2].v,out[n+3].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vRealF    *out,vRealH    *in,int nvec) |  | ||||||
| { |  | ||||||
|   assert((nvec&0x1)==0); |  | ||||||
|   for(int m=0;m*2<nvec;m++){ |  | ||||||
|     int n=m*2; |  | ||||||
|     Optimization::PrecisionChange::HtoS(in[m].v,out[n].v,out[n+1].v); |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| inline void precisionChange(vComplexF *out,vComplexD *in,int nvec){ precisionChange((vRealF *)out,(vRealD *)in,nvec);} |  | ||||||
| inline void precisionChange(vComplexH *out,vComplexD *in,int nvec){ precisionChange((vRealH *)out,(vRealD *)in,nvec);} |  | ||||||
| inline void precisionChange(vComplexH *out,vComplexF *in,int nvec){ precisionChange((vRealH *)out,(vRealF *)in,nvec);} |  | ||||||
| inline void precisionChange(vComplexD *out,vComplexF *in,int nvec){ precisionChange((vRealD *)out,(vRealF *)in,nvec);} |  | ||||||
| inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);} |  | ||||||
| inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);} |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| // Check our vector types are of an appropriate size. | // Check our vector types are of an appropriate size. | ||||||
| #if defined QPX | #if defined QPX | ||||||
| static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); | static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								lib/stencil/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/stencil/.dirstamp
									
									
									
									
									
										Normal file
									
								
							| @@ -56,11 +56,11 @@ class iScalar { | |||||||
|   typedef vtype element; |   typedef vtype element; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; |   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; |   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; |  | ||||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; |   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; |  | ||||||
|   typedef iScalar<tensor_reduced_v> tensor_reduced; |   typedef iScalar<tensor_reduced_v> tensor_reduced; | ||||||
|  |   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||||
|   typedef iScalar<recurse_scalar_object> scalar_object; |   typedef iScalar<recurse_scalar_object> scalar_object; | ||||||
|  |  | ||||||
|   // substitutes a real or complex version with same tensor structure |   // substitutes a real or complex version with same tensor structure | ||||||
|   typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified; |   typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified; | ||||||
|   typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified; |   typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified; | ||||||
| @@ -77,12 +77,8 @@ class iScalar { | |||||||
|   iScalar<vtype> & operator= (const iScalar<vtype> ©me) = default; |   iScalar<vtype> & operator= (const iScalar<vtype> ©me) = default; | ||||||
|   iScalar<vtype> & operator= (iScalar<vtype> &©me) = default; |   iScalar<vtype> & operator= (iScalar<vtype> &©me) = default; | ||||||
|   */ |   */ | ||||||
|  |   iScalar(scalar_type s) | ||||||
|   //  template<int N=0> |       : _internal(s){};  // recurse down and hit the constructor for vector_type | ||||||
|   //  iScalar(EnableIf<isSIMDvectorized<vector_type>, vector_type> s) : _internal(s){};  // recurse down and hit the constructor for vector_type |  | ||||||
|  |  | ||||||
|   iScalar(scalar_type s) : _internal(s){};  // recurse down and hit the constructor for vector_type |  | ||||||
|  |  | ||||||
|   iScalar(const Zero &z) { *this = zero; }; |   iScalar(const Zero &z) { *this = zero; }; | ||||||
|  |  | ||||||
|   iScalar<vtype> &operator=(const Zero &hero) { |   iScalar<vtype> &operator=(const Zero &hero) { | ||||||
| @@ -138,27 +134,32 @@ class iScalar { | |||||||
|   strong_inline const vtype &operator()(void) const { return _internal; } |   strong_inline const vtype &operator()(void) const { return _internal; } | ||||||
|  |  | ||||||
|   // Type casts meta programmed, must be pure scalar to match TensorRemove |   // Type casts meta programmed, must be pure scalar to match TensorRemove | ||||||
|   template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> |   template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, | ||||||
|  |             IfNotSimd<U> = 0> | ||||||
|   operator ComplexF() const { |   operator ComplexF() const { | ||||||
|     return (TensorRemove(_internal)); |     return (TensorRemove(_internal)); | ||||||
|   }; |   }; | ||||||
|   template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> |   template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, | ||||||
|  |             IfNotSimd<U> = 0> | ||||||
|   operator ComplexD() const { |   operator ComplexD() const { | ||||||
|     return (TensorRemove(_internal)); |     return (TensorRemove(_internal)); | ||||||
|   }; |   }; | ||||||
|   //  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = |   //  template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = | ||||||
|   //  0> operator RealD    () const { return(real(TensorRemove(_internal))); } |   //  0> operator RealD    () const { return(real(TensorRemove(_internal))); } | ||||||
|   template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> |   template <class U = vtype, class V = scalar_type, IfReal<V> = 0, | ||||||
|  |             IfNotSimd<U> = 0> | ||||||
|   operator RealD() const { |   operator RealD() const { | ||||||
|     return TensorRemove(_internal); |     return TensorRemove(_internal); | ||||||
|   } |   } | ||||||
|   template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> |   template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, | ||||||
|  |             IfNotSimd<U> = 0> | ||||||
|   operator Integer() const { |   operator Integer() const { | ||||||
|     return Integer(TensorRemove(_internal)); |     return Integer(TensorRemove(_internal)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // convert from a something to a scalar via constructor of something arg |   // convert from a something to a scalar via constructor of something arg | ||||||
|   template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr> |   template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type | ||||||
|  |                          * = nullptr> | ||||||
|   strong_inline iScalar<vtype> operator=(T arg) { |   strong_inline iScalar<vtype> operator=(T arg) { | ||||||
|     _internal = arg; |     _internal = arg; | ||||||
|     return *this; |     return *this; | ||||||
| @@ -192,7 +193,6 @@ class iVector { | |||||||
|   typedef vtype element; |   typedef vtype element; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; |   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; |   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; |  | ||||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; |   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; |   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||||
|   typedef iScalar<tensor_reduced_v> tensor_reduced; |   typedef iScalar<tensor_reduced_v> tensor_reduced; | ||||||
| @@ -305,7 +305,6 @@ class iMatrix { | |||||||
|   typedef vtype element; |   typedef vtype element; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; |   typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_type vector_type; |   typedef typename GridTypeMapper<vtype>::vector_type vector_type; | ||||||
|   typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD; |  | ||||||
|   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; |   typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; | ||||||
|   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; |   typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -35,71 +35,13 @@ namespace Grid { | |||||||
|     // innerProduct Matrix x Matrix -> Scalar |     // innerProduct Matrix x Matrix -> Scalar | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////////////////////////////// | ||||||
|     template<class sobj> inline RealD norm2(const sobj &arg){ |     template<class sobj> inline RealD norm2(const sobj &arg){ | ||||||
|     auto nrm = innerProductD(arg,arg); |       typedef typename sobj::scalar_type scalar; | ||||||
|  |       decltype(innerProduct(arg,arg)) nrm; | ||||||
|  |       nrm = innerProduct(arg,arg); | ||||||
|       RealD ret = real(nrm); |       RealD ret = real(nrm); | ||||||
|       return ret; |       return ret; | ||||||
|     } |     } | ||||||
|   ////////////////////////////////////// |  | ||||||
|   // If single promote to double and sum 2x |  | ||||||
|   ////////////////////////////////////// |  | ||||||
|  |  | ||||||
| inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){  return innerProduct(l,r); } |  | ||||||
| inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){  return innerProduct(l,r); } |  | ||||||
| inline RealD    innerProductD(const RealD    &l,const RealD    &r){  return innerProduct(l,r); } |  | ||||||
| inline RealD    innerProductD(const RealF    &l,const RealF    &r){  return innerProduct(l,r); } |  | ||||||
|  |  | ||||||
| inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){  return innerProduct(l,r); } |  | ||||||
| inline vRealD    innerProductD(const vRealD    &l,const vRealD    &r){  return innerProduct(l,r); } |  | ||||||
| inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){   |  | ||||||
|   vComplexD la,lb; |  | ||||||
|   vComplexD ra,rb; |  | ||||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); |  | ||||||
|   Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v); |  | ||||||
|   return innerProduct(la,ra) + innerProduct(lb,rb);  |  | ||||||
| } |  | ||||||
| inline vRealD innerProductD(const vRealF &l,const vRealF &r){   |  | ||||||
|   vRealD la,lb; |  | ||||||
|   vRealD ra,rb; |  | ||||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); |  | ||||||
|   Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v); |  | ||||||
|   return innerProduct(la,ra) + innerProduct(lb,rb);  |  | ||||||
| } |  | ||||||
|  |  | ||||||
|   template<class l,class r,int N> inline |  | ||||||
|   auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))> |  | ||||||
|   { |  | ||||||
|     typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t; |  | ||||||
|     iScalar<ret_t> ret; |  | ||||||
|     ret=zero; |  | ||||||
|     for(int c1=0;c1<N;c1++){ |  | ||||||
|       ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]); |  | ||||||
|     } |  | ||||||
|     return ret; |  | ||||||
|   } |  | ||||||
|   template<class l,class r,int N> inline |  | ||||||
|   auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))> |  | ||||||
|   { |  | ||||||
|     typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t; |  | ||||||
|     iScalar<ret_t> ret; |  | ||||||
|     iScalar<ret_t> tmp; |  | ||||||
|     ret=zero; |  | ||||||
|     for(int c1=0;c1<N;c1++){ |  | ||||||
|     for(int c2=0;c2<N;c2++){ |  | ||||||
|       ret._internal+=innerProductD(lhs._internal[c1][c2],rhs._internal[c1][c2]); |  | ||||||
|     }} |  | ||||||
|     return ret; |  | ||||||
|   } |  | ||||||
|   template<class l,class r> inline |  | ||||||
|   auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))> |  | ||||||
|   { |  | ||||||
|     typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t; |  | ||||||
|     iScalar<ret_t> ret; |  | ||||||
|     ret._internal = innerProductD(lhs._internal,rhs._internal); |  | ||||||
|     return ret; |  | ||||||
|   } |  | ||||||
|   ////////////////////// |  | ||||||
|   // Keep same precison |  | ||||||
|   ////////////////////// |  | ||||||
|     template<class l,class r,int N> inline |     template<class l,class r,int N> inline | ||||||
|     auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))> |     auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))> | ||||||
|     { |     { | ||||||
|   | |||||||
| @@ -53,7 +53,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef typename T::scalar_type scalar_type; |     typedef typename T::scalar_type scalar_type; | ||||||
|     typedef typename T::vector_type vector_type; |     typedef typename T::vector_type vector_type; | ||||||
|     typedef typename T::vector_typeD vector_typeD; |  | ||||||
|     typedef typename T::tensor_reduced tensor_reduced; |     typedef typename T::tensor_reduced tensor_reduced; | ||||||
|     typedef typename T::scalar_object scalar_object; |     typedef typename T::scalar_object scalar_object; | ||||||
|     typedef typename T::Complexified Complexified; |     typedef typename T::Complexified Complexified; | ||||||
| @@ -68,7 +67,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef RealF scalar_type; |     typedef RealF scalar_type; | ||||||
|     typedef RealF vector_type; |     typedef RealF vector_type; | ||||||
|     typedef RealD vector_typeD; |  | ||||||
|     typedef RealF tensor_reduced ; |     typedef RealF tensor_reduced ; | ||||||
|     typedef RealF scalar_object; |     typedef RealF scalar_object; | ||||||
|     typedef ComplexF Complexified; |     typedef ComplexF Complexified; | ||||||
| @@ -79,7 +77,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef RealD scalar_type; |     typedef RealD scalar_type; | ||||||
|     typedef RealD vector_type; |     typedef RealD vector_type; | ||||||
|     typedef RealD vector_typeD; |  | ||||||
|     typedef RealD tensor_reduced; |     typedef RealD tensor_reduced; | ||||||
|     typedef RealD scalar_object; |     typedef RealD scalar_object; | ||||||
|     typedef ComplexD Complexified; |     typedef ComplexD Complexified; | ||||||
| @@ -90,7 +87,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef ComplexF scalar_type; |     typedef ComplexF scalar_type; | ||||||
|     typedef ComplexF vector_type; |     typedef ComplexF vector_type; | ||||||
|     typedef ComplexD vector_typeD; |  | ||||||
|     typedef ComplexF tensor_reduced; |     typedef ComplexF tensor_reduced; | ||||||
|     typedef ComplexF scalar_object; |     typedef ComplexF scalar_object; | ||||||
|     typedef ComplexF Complexified; |     typedef ComplexF Complexified; | ||||||
| @@ -101,7 +97,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef ComplexD scalar_type; |     typedef ComplexD scalar_type; | ||||||
|     typedef ComplexD vector_type; |     typedef ComplexD vector_type; | ||||||
|     typedef ComplexD vector_typeD; |  | ||||||
|     typedef ComplexD tensor_reduced; |     typedef ComplexD tensor_reduced; | ||||||
|     typedef ComplexD scalar_object; |     typedef ComplexD scalar_object; | ||||||
|     typedef ComplexD Complexified; |     typedef ComplexD Complexified; | ||||||
| @@ -112,7 +107,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef Integer scalar_type; |     typedef Integer scalar_type; | ||||||
|     typedef Integer vector_type; |     typedef Integer vector_type; | ||||||
|     typedef Integer vector_typeD; |  | ||||||
|     typedef Integer tensor_reduced; |     typedef Integer tensor_reduced; | ||||||
|     typedef Integer scalar_object; |     typedef Integer scalar_object; | ||||||
|     typedef void Complexified; |     typedef void Complexified; | ||||||
| @@ -124,7 +118,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef RealF  scalar_type; |     typedef RealF  scalar_type; | ||||||
|     typedef vRealF vector_type; |     typedef vRealF vector_type; | ||||||
|     typedef vRealD vector_typeD; |  | ||||||
|     typedef vRealF tensor_reduced; |     typedef vRealF tensor_reduced; | ||||||
|     typedef RealF  scalar_object; |     typedef RealF  scalar_object; | ||||||
|     typedef vComplexF Complexified; |     typedef vComplexF Complexified; | ||||||
| @@ -135,7 +128,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef RealD  scalar_type; |     typedef RealD  scalar_type; | ||||||
|     typedef vRealD vector_type; |     typedef vRealD vector_type; | ||||||
|     typedef vRealD vector_typeD; |  | ||||||
|     typedef vRealD tensor_reduced; |     typedef vRealD tensor_reduced; | ||||||
|     typedef RealD  scalar_object; |     typedef RealD  scalar_object; | ||||||
|     typedef vComplexD Complexified; |     typedef vComplexD Complexified; | ||||||
| @@ -146,7 +138,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef ComplexF  scalar_type; |     typedef ComplexF  scalar_type; | ||||||
|     typedef vComplexF vector_type; |     typedef vComplexF vector_type; | ||||||
|     typedef vComplexD vector_typeD; |  | ||||||
|     typedef vComplexF tensor_reduced; |     typedef vComplexF tensor_reduced; | ||||||
|     typedef ComplexF  scalar_object; |     typedef ComplexF  scalar_object; | ||||||
|     typedef vComplexF Complexified; |     typedef vComplexF Complexified; | ||||||
| @@ -157,7 +148,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef ComplexD  scalar_type; |     typedef ComplexD  scalar_type; | ||||||
|     typedef vComplexD vector_type; |     typedef vComplexD vector_type; | ||||||
|     typedef vComplexD vector_typeD; |  | ||||||
|     typedef vComplexD tensor_reduced; |     typedef vComplexD tensor_reduced; | ||||||
|     typedef ComplexD  scalar_object; |     typedef ComplexD  scalar_object; | ||||||
|     typedef vComplexD Complexified; |     typedef vComplexD Complexified; | ||||||
| @@ -168,7 +158,6 @@ namespace Grid { | |||||||
|   public: |   public: | ||||||
|     typedef  Integer scalar_type; |     typedef  Integer scalar_type; | ||||||
|     typedef vInteger vector_type; |     typedef vInteger vector_type; | ||||||
|     typedef vInteger vector_typeD; |  | ||||||
|     typedef vInteger tensor_reduced; |     typedef vInteger tensor_reduced; | ||||||
|     typedef  Integer scalar_object; |     typedef  Integer scalar_object; | ||||||
|     typedef void Complexified; |     typedef void Complexified; | ||||||
| @@ -252,8 +241,7 @@ namespace Grid { | |||||||
|   template<typename T> |   template<typename T> | ||||||
|   class isSIMDvectorized{ |   class isSIMDvectorized{ | ||||||
|     template<typename U> |     template<typename U> | ||||||
|     static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,    |     static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *); | ||||||
|       typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *); |  | ||||||
|  |  | ||||||
|     template<typename U> |     template<typename U> | ||||||
|     static double test(...); |     static double test(...); | ||||||
|   | |||||||
| @@ -311,8 +311,8 @@ void Grid_init(int *argc,char ***argv) | |||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"Performance:"<<std::endl; |     std::cout<<GridLogMessage<<"Performance:"<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"  --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;     |     std::cout<<GridLogMessage<<"  --comms-isend   : Asynchronous MPI calls; several dirs at a time "<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;     |     std::cout<<GridLogMessage<<"  --comms-sendrecv: Synchronous MPI calls; one dirs at a time "<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<"  --comms-overlap : Overlap comms with compute "<<std::endl;     |     std::cout<<GridLogMessage<<"  --comms-overlap : Overlap comms with compute "<<std::endl;     | ||||||
|     std::cout<<GridLogMessage<<std::endl; |     std::cout<<GridLogMessage<<std::endl; | ||||||
|     std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;     |     std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;     | ||||||
| @@ -457,6 +457,5 @@ void Grid_debug_handler_init(void) | |||||||
|  |  | ||||||
|   sigaction(SIGFPE,&sa,NULL); |   sigaction(SIGFPE,&sa,NULL); | ||||||
|   sigaction(SIGKILL,&sa,NULL); |   sigaction(SIGKILL,&sa,NULL); | ||||||
|   sigaction(SIGILL,&sa,NULL); |  | ||||||
| } | } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -308,23 +308,18 @@ public: | |||||||
|   int n; |   int n; | ||||||
|   funcExchange(int _n) { n=_n;}; |   funcExchange(int _n) { n=_n;}; | ||||||
|   template<class vec>    void operator()(vec &r1,vec &r2,vec &i1,vec &i2) const { exchange(r1,r2,i1,i2,n);} |   template<class vec>    void operator()(vec &r1,vec &r2,vec &i1,vec &i2) const { exchange(r1,r2,i1,i2,n);} | ||||||
|   template<class scal>   void apply(std::vector<scal> &r1, |   template<class scal>   void apply(std::vector<scal> &r1,std::vector<scal> &r2,std::vector<scal> &in1,std::vector<scal> &in2)  const {  | ||||||
| 				    std::vector<scal> &r2, |  | ||||||
| 				    std::vector<scal> &in1, |  | ||||||
| 				    std::vector<scal> &in2)  const  |  | ||||||
|   {  |  | ||||||
|     int sz=in1.size(); |     int sz=in1.size(); | ||||||
|  |  | ||||||
|  |      | ||||||
|     int msk = sz>>(n+1); |     int msk = sz>>(n+1); | ||||||
|  |  | ||||||
|     for(int i=0;i<sz;i++) { |     int j1=0; | ||||||
|       int j1 = i&(~msk); |     int j2=0; | ||||||
|       int j2 = i|msk; |     for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in1[ i ]; | ||||||
|       if  ( (i&msk) == 0 ) { r1[i]=in1[j1];} |     for(int i=0;i<sz;i++) if ( (i&msk) == 0 ) r1[j1++] = in2[ i ]; | ||||||
|       else                 { r1[i]=in2[j1];} |     for(int i=0;i<sz;i++) if ( (i&msk)  ) r2[j2++] = in1[ i ]; | ||||||
|  |     for(int i=0;i<sz;i++) if ( (i&msk)  ) r2[j2++] = in2[ i ]; | ||||||
|       if  ( (i&msk) == 0 ) { r2[i]=in1[j2];} |  | ||||||
|       else                 { r2[i]=in2[j2];} |  | ||||||
|     }       |  | ||||||
|   } |   } | ||||||
|   std::string name(void) const { return std::string("Exchange"); } |   std::string name(void) const { return std::string("Exchange"); } | ||||||
| }; | }; | ||||||
| @@ -459,8 +454,8 @@ void ExchangeTester(const functor &func) | |||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << " " << func.name() << " " <<func.n <<std::endl; |   std::cout<<GridLogMessage << " " << func.name() << " " <<func.n <<std::endl; | ||||||
|  |  | ||||||
|   //for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" ref "<<reference1[i]<<" res "<<result1[i]<<std::endl; |   //  for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference1[i]<<" "<<result1[i]<<std::endl; | ||||||
|   //for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" ref "<<reference2[i]<<" res "<<result2[i]<<std::endl; |   //  for(int i=0;i<Nsimd;i++) std::cout << " i "<<i<<" "<<reference2[i]<<" "<<result2[i]<<std::endl; | ||||||
|  |  | ||||||
|   for(int i=0;i<Nsimd;i++){ |   for(int i=0;i<Nsimd;i++){ | ||||||
|     int found=0; |     int found=0; | ||||||
| @@ -470,7 +465,7 @@ void ExchangeTester(const functor &func) | |||||||
| 	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference1[j]<<" "<<result1[i]<<std::endl; | 	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference1[j]<<" "<<result1[i]<<std::endl; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     //    assert(found==1); |     assert(found==1); | ||||||
|   } |   } | ||||||
|   for(int i=0;i<Nsimd;i++){ |   for(int i=0;i<Nsimd;i++){ | ||||||
|     int found=0; |     int found=0; | ||||||
| @@ -480,24 +475,15 @@ void ExchangeTester(const functor &func) | |||||||
| 	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference2[j]<<" "<<result2[i]<<std::endl; | 	//	std::cout << " i "<<i<<" j "<<j<<" "<<reference2[j]<<" "<<result2[i]<<std::endl; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     //    assert(found==1); |     assert(found==1); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /* |  | ||||||
|   for(int i=0;i<Nsimd;i++){ |  | ||||||
|     std::cout << " i "<< i |  | ||||||
| 	      <<" result1  "<<result1[i] |  | ||||||
| 	      <<" result2  "<<result2[i] |  | ||||||
| 	      <<" test1  "<<test1[i] |  | ||||||
| 	      <<" test2  "<<test2[i] |  | ||||||
| 	      <<" input1 "<<input1[i] |  | ||||||
| 	      <<" input2 "<<input2[i]<<std::endl; |  | ||||||
|   } |  | ||||||
|   */ |  | ||||||
|   for(int i=0;i<Nsimd;i++){ |   for(int i=0;i<Nsimd;i++){ | ||||||
|     assert(test1[i]==input1[i]); |     assert(test1[i]==input1[i]); | ||||||
|     assert(test2[i]==input2[i]); |     assert(test2[i]==input2[i]); | ||||||
|   } |   }//    std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl; | ||||||
|  |     //    std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl; | ||||||
|  |   //  } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -692,69 +678,5 @@ int main (int argc, char ** argv) | |||||||
|   IntTester(funcMinus()); |   IntTester(funcMinus()); | ||||||
|   IntTester(funcTimes()); |   IntTester(funcTimes()); | ||||||
|  |  | ||||||
|   std::cout<<GridLogMessage << "==================================="<<  std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "Testing precisionChange            "<<  std::endl; |  | ||||||
|   std::cout<<GridLogMessage << "==================================="<<  std::endl; |  | ||||||
|   { |  | ||||||
|     GridSerialRNG          sRNG; |  | ||||||
|     sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); |  | ||||||
|     const int Ndp = 16; |  | ||||||
|     const int Nsp = Ndp/2; |  | ||||||
|     const int Nhp = Ndp/4; |  | ||||||
|     std::vector<vRealH,alignedAllocator<vRealH> > H (Nhp); |  | ||||||
|     std::vector<vRealF,alignedAllocator<vRealF> > F (Nsp); |  | ||||||
|     std::vector<vRealF,alignedAllocator<vRealF> > FF(Nsp); |  | ||||||
|     std::vector<vRealD,alignedAllocator<vRealD> > D (Ndp); |  | ||||||
|     std::vector<vRealD,alignedAllocator<vRealD> > DD(Ndp); |  | ||||||
|     for(int i=0;i<16;i++){ |  | ||||||
|       random(sRNG,D[i]); |  | ||||||
|     } |  | ||||||
|     // Double to Single |  | ||||||
|     precisionChange(&F[0],&D[0],Ndp); |  | ||||||
|     precisionChange(&DD[0],&F[0],Ndp); |  | ||||||
|     std::cout << GridLogMessage<<"Double to single"; |  | ||||||
|     for(int i=0;i<Ndp;i++){ |  | ||||||
|       //      std::cout << "DD["<<i<<"] = "<< DD[i]<<" "<<D[i]<<" "<<DD[i]-D[i] <<std::endl;  |  | ||||||
|       DD[i] = DD[i] - D[i]; |  | ||||||
|       decltype(innerProduct(DD[0],DD[0])) nrm; |  | ||||||
|       nrm = innerProduct(DD[i],DD[i]); |  | ||||||
|       auto tmp = Reduce(nrm); |  | ||||||
|       //      std::cout << tmp << std::endl; |  | ||||||
|       assert( tmp < 1.0e-14 );  |  | ||||||
|     } |  | ||||||
|     std::cout <<" OK ! "<<std::endl; |  | ||||||
|  |  | ||||||
|     // Double to Half |  | ||||||
| #ifdef USE_FP16 |  | ||||||
|     std::cout << GridLogMessage<< "Double to half" ; |  | ||||||
|     precisionChange(&H[0],&D[0],Ndp); |  | ||||||
|     precisionChange(&DD[0],&H[0],Ndp); |  | ||||||
|     for(int i=0;i<Ndp;i++){ |  | ||||||
|       //      std::cout << "DD["<<i<<"] = "<< DD[i]<<" "<<D[i]<<" "<<DD[i]-D[i]<<std::endl;  |  | ||||||
|       DD[i] = DD[i] - D[i]; |  | ||||||
|       decltype(innerProduct(DD[0],DD[0])) nrm; |  | ||||||
|       nrm = innerProduct(DD[i],DD[i]); |  | ||||||
|       auto tmp = Reduce(nrm); |  | ||||||
|       //      std::cout << tmp << std::endl; |  | ||||||
|       assert( tmp < 1.0e-3 );  |  | ||||||
|     } |  | ||||||
|     std::cout <<" OK ! "<<std::endl; |  | ||||||
|  |  | ||||||
|     std::cout << GridLogMessage<< "Single to half"; |  | ||||||
|     // Single to Half |  | ||||||
|     precisionChange(&H[0] ,&F[0],Nsp); |  | ||||||
|     precisionChange(&FF[0],&H[0],Nsp); |  | ||||||
|     for(int i=0;i<Nsp;i++){ |  | ||||||
|       //      std::cout << "FF["<<i<<"] = "<< FF[i]<<" "<<F[i]<<" "<<FF[i]-F[i]<<std::endl;  |  | ||||||
|       FF[i] = FF[i] - F[i]; |  | ||||||
|       decltype(innerProduct(FF[0],FF[0])) nrm; |  | ||||||
|       nrm = innerProduct(FF[i],FF[i]); |  | ||||||
|       auto tmp = Reduce(nrm); |  | ||||||
|       //      std::cout << tmp << std::endl; |  | ||||||
|       assert( tmp < 1.0e-3 );  |  | ||||||
|     } |  | ||||||
|     std::cout <<" OK ! "<<std::endl; |  | ||||||
| #endif |  | ||||||
|   } |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -148,13 +148,11 @@ class FourierAcceleratedGaugeFixer  : public Gimpl { | |||||||
|     Complex psqMax(16.0); |     Complex psqMax(16.0); | ||||||
|     Fp =  psqMax*one/psq; |     Fp =  psqMax*one/psq; | ||||||
|  |  | ||||||
|     /* |  | ||||||
|     static int once; |     static int once; | ||||||
|     if ( once == 0 ) {  |     if ( once == 0 ) {  | ||||||
|       std::cout << " Fp " << Fp <<std::endl; |       std::cout << " Fp " << Fp <<std::endl; | ||||||
|       once ++; |       once ++; | ||||||
|       }*/ |     } | ||||||
|  |  | ||||||
|     pokeSite(TComplex(1.0),Fp,coor); |     pokeSite(TComplex(1.0),Fp,coor); | ||||||
|  |  | ||||||
|     dmuAmu_p  = dmuAmu_p * Fp;  |     dmuAmu_p  = dmuAmu_p * Fp;  | ||||||
|   | |||||||
| @@ -2,10 +2,11 @@ | |||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_wilson_tm_even_odd.cc |     Source file: ./tests/Test_wilson_even_odd.cc | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
| @@ -88,8 +89,8 @@ int main (int argc, char ** argv) | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   RealD mass=0.1; |   RealD mass=0.1; | ||||||
|  |   RealD mu  = 0.1; | ||||||
|   WilsonFermionR Dw(Umu,Grid,RBGrid,mass); |   WilsonTMFermionR Dw(Umu,Grid,RBGrid,mass,mu); | ||||||
|  |  | ||||||
|   LatticeFermion src_e   (&RBGrid); |   LatticeFermion src_e   (&RBGrid); | ||||||
|   LatticeFermion src_o   (&RBGrid); |   LatticeFermion src_o   (&RBGrid); | ||||||
| @@ -206,7 +207,7 @@ int main (int argc, char ** argv) | |||||||
|   pickCheckerboard(Odd ,phi_o,phi); |   pickCheckerboard(Odd ,phi_o,phi); | ||||||
|   RealD t1,t2; |   RealD t1,t2; | ||||||
|  |  | ||||||
|   SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw); |   SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw); | ||||||
|   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); |   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); | ||||||
|   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); |   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -2,11 +2,10 @@ | |||||||
| 
 | 
 | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
| 
 | 
 | ||||||
|     Source file: ./tests/Test_wilson_even_odd.cc |     Source file: ./tests/Test_wilson_tm_even_odd.cc | ||||||
| 
 | 
 | ||||||
|     Copyright (C) 2015 |     Copyright (C) 2015 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
| 
 | 
 | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
| @@ -89,8 +88,8 @@ int main (int argc, char ** argv) | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   RealD mass=0.1; |   RealD mass=0.1; | ||||||
|   RealD mu  = 0.1; | 
 | ||||||
|   WilsonTMFermionR Dw(Umu,Grid,RBGrid,mass,mu); |   WilsonFermionR Dw(Umu,Grid,RBGrid,mass); | ||||||
| 
 | 
 | ||||||
|   LatticeFermion src_e   (&RBGrid); |   LatticeFermion src_e   (&RBGrid); | ||||||
|   LatticeFermion src_o   (&RBGrid); |   LatticeFermion src_o   (&RBGrid); | ||||||
| @@ -207,7 +206,7 @@ int main (int argc, char ** argv) | |||||||
|   pickCheckerboard(Odd ,phi_o,phi); |   pickCheckerboard(Odd ,phi_o,phi); | ||||||
|   RealD t1,t2; |   RealD t1,t2; | ||||||
| 
 | 
 | ||||||
|   SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw); |   SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw); | ||||||
|   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); |   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); | ||||||
|   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); |   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); | ||||||
| 
 | 
 | ||||||
| @@ -115,8 +115,8 @@ int main (int argc, char ** argv) | |||||||
|   RNG.SeedFixedIntegers(seeds); |   RNG.SeedFixedIntegers(seeds); | ||||||
|  |  | ||||||
|  |  | ||||||
|   RealD alpha = 1.2; |   RealD alpha = 1.0; | ||||||
|   RealD beta  = 0.1; |   RealD beta  = 0.03; | ||||||
|   RealD mu    = 0.0; |   RealD mu    = 0.0; | ||||||
|   int order = 11; |   int order = 11; | ||||||
|   ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order); |   ChebyshevLanczos<LatticeComplex> Cheby(alpha,beta,mu,order); | ||||||
| @@ -131,9 +131,10 @@ int main (int argc, char ** argv) | |||||||
|   const int Nit= 10000; |   const int Nit= 10000; | ||||||
|  |  | ||||||
|   int Nconv; |   int Nconv; | ||||||
|   RealD eresid = 1.0e-6; |   RealD eresid = 1.0e-8; | ||||||
|  |  | ||||||
|   ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit); |   ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit); | ||||||
|  |  | ||||||
|   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); |   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); | ||||||
|  |  | ||||||
|   LatticeComplex src(grid); gaussian(RNG,src); |   LatticeComplex src(grid); gaussian(RNG,src); | ||||||
| @@ -144,9 +145,9 @@ int main (int argc, char ** argv) | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   { |   { | ||||||
|     std::vector<RealD>          eval(Nm); |     //    std::vector<RealD>          eval(Nm); | ||||||
|     std::vector<LatticeComplex> evec(Nm,grid); |     //    std::vector<LatticeComplex> evec(Nm,grid); | ||||||
|     ChebyIRL.calc(eval,evec,src, Nconv); |     //    ChebyIRL.calc(eval,evec,src, Nconv); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
|   | |||||||
| @@ -89,7 +89,7 @@ int main(int argc, char** argv) { | |||||||
|   GridStopWatch CGTimer; |   GridStopWatch CGTimer; | ||||||
|  |  | ||||||
|   SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf); |   SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf); | ||||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-5, 10000, 0);// switch off the assert |   ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert | ||||||
|  |  | ||||||
|   CGTimer.Start(); |   CGTimer.Start(); | ||||||
|   CG(HermOpEO, src_o, result_o); |   CG(HermOpEO, src_o, result_o); | ||||||
|   | |||||||
| @@ -73,7 +73,7 @@ int main (int argc, char ** argv) | |||||||
|   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); |   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |  | ||||||
|   MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf); |   MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf); | ||||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-6,10000); |   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); | ||||||
|   CG(HermOp,src,result); |   CG(HermOp,src,result); | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
|   | |||||||
| @@ -1,119 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_wilson_cg_unprec.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| template<class d> |  | ||||||
| struct scal { |  | ||||||
|   d internal; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|   Gamma::Algebra Gmu [] = { |  | ||||||
|     Gamma::Algebra::GammaX, |  | ||||||
|     Gamma::Algebra::GammaY, |  | ||||||
|     Gamma::Algebra::GammaZ, |  | ||||||
|     Gamma::Algebra::GammaT |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   typedef typename ImprovedStaggeredFermion5DR::FermionField FermionField;  |  | ||||||
|   typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;  |  | ||||||
|   typename ImprovedStaggeredFermion5DR::ImplParams params;  |  | ||||||
|  |  | ||||||
|   const int Ls=4; |  | ||||||
|  |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |  | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|  |  | ||||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); |  | ||||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); |  | ||||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); |  | ||||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds({1,2,3,4}); |  | ||||||
|   GridParallelRNG pRNG(UGrid );  pRNG.SeedFixedIntegers(seeds); |  | ||||||
|   GridParallelRNG pRNG5(FGrid);  pRNG5.SeedFixedIntegers(seeds); |  | ||||||
|  |  | ||||||
|   FermionField src(FGrid); random(pRNG5,src); |  | ||||||
|   FermionField result(FGrid); result=zero; |  | ||||||
|   RealD nrm = norm2(src); |  | ||||||
|  |  | ||||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); |  | ||||||
|  |  | ||||||
|   RealD mass=0.01; |  | ||||||
|   ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); |  | ||||||
|   MdagMLinearOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds); |  | ||||||
|  |  | ||||||
|   ConjugateGradient<FermionField> CG(1.0e-8,10000); |  | ||||||
|   BlockConjugateGradient<FermionField> BCG(1.0e-8,10000); |  | ||||||
|   MultiRHSConjugateGradient<FermionField> mCG(1.0e-8,10000); |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage << " Calling 4d CG "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass); |  | ||||||
|   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d); |  | ||||||
|   FermionField src4d(UGrid); random(pRNG,src4d); |  | ||||||
|   FermionField result4d(UGrid); result4d=zero; |  | ||||||
|   CG(HermOp4d,src4d,result4d); |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage << " Calling 5d CG for "<<Ls <<" right hand sides" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   result=zero; |  | ||||||
|   CG(HermOp,src,result); |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage << " Calling multiRHS CG for "<<Ls <<" right hand sides" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   result=zero; |  | ||||||
|   mCG(HermOp,src,result); |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|  |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl; |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|   result=zero; |  | ||||||
|   BCG(HermOp,src,result); |  | ||||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
| @@ -1,82 +0,0 @@ | |||||||
|     /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./tests/Test_wilson_cg_unprec.cc |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
|     *************************************************************************************/ |  | ||||||
|     /*  END LEGAL */ |  | ||||||
| #include <Grid/Grid.h> |  | ||||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> |  | ||||||
|  |  | ||||||
| using namespace std; |  | ||||||
| using namespace Grid; |  | ||||||
| using namespace Grid::QCD; |  | ||||||
|  |  | ||||||
| template<class d> |  | ||||||
| struct scal { |  | ||||||
|   d internal; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|   Gamma::Algebra Gmu [] = { |  | ||||||
|     Gamma::Algebra::GammaX, |  | ||||||
|     Gamma::Algebra::GammaY, |  | ||||||
|     Gamma::Algebra::GammaZ, |  | ||||||
|     Gamma::Algebra::GammaT |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) |  | ||||||
| { |  | ||||||
|   typedef typename ImprovedStaggeredFermionR::FermionField FermionField;  |  | ||||||
|   typedef typename ImprovedStaggeredFermionR::ComplexField ComplexField;  |  | ||||||
|   typename ImprovedStaggeredFermionR::ImplParams params;  |  | ||||||
|  |  | ||||||
|   Grid_init(&argc,&argv); |  | ||||||
|  |  | ||||||
|   std::vector<int> latt_size   = GridDefaultLatt(); |  | ||||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); |  | ||||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); |  | ||||||
|   GridCartesian               Grid(latt_size,simd_layout,mpi_layout); |  | ||||||
|   GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout); |  | ||||||
|  |  | ||||||
|   std::vector<int> seeds({1,2,3,4}); |  | ||||||
|   GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds); |  | ||||||
|  |  | ||||||
|   FermionField src(&Grid); random(pRNG,src); |  | ||||||
|   RealD nrm = norm2(src); |  | ||||||
|   FermionField result(&Grid); result=zero; |  | ||||||
|   LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); |  | ||||||
|  |  | ||||||
|   double volume=1; |  | ||||||
|   for(int mu=0;mu<Nd;mu++){ |  | ||||||
|     volume=volume*latt_size[mu]; |  | ||||||
|   }   |  | ||||||
|    |  | ||||||
|   RealD mass=0.1; |  | ||||||
|   ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass); |  | ||||||
|  |  | ||||||
|   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp(Ds); |  | ||||||
|   CG(HermOp,src,result); |  | ||||||
|  |  | ||||||
|   Grid_finalize(); |  | ||||||
| } |  | ||||||
		Reference in New Issue
	
	Block a user