mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Compare commits
	
		
			213 Commits
		
	
	
		
			ckelly-dec
			...
			chulwoo-de
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					c5ab9f247f | ||
| 
						 | 
					2893a9b116 | ||
| 
						 | 
					5c90c3b457 | ||
| 91e04056f9 | |||
| 3789e3f31c | |||
| 0c66719210 | |||
| 
						 | 
					3a5b5c8bec | ||
| 
						 | 
					fdbe071213 | ||
| 4bc21ec7cb | |||
| e3083b6dfc | |||
| 
						 | 
					ab89418658 | ||
| 
						 | 
					28cd99882c | ||
| 
						 | 
					aceaee774c | ||
| 
						 | 
					f8f9fd6f22 | ||
| 101aa769eb | |||
| 0bf99bfde5 | |||
| 64bf6fe54e | |||
| 1161d566b9 | |||
| c698b16d75 | |||
| c4c89336fe | |||
| fa59789580 | |||
| 92c2c7d3b5 | |||
| e99ce0875f | |||
| cc1d9eb05b | |||
| 57c027fea2 | |||
| 207dc439a7 | |||
| 77ef0bba48 | |||
| 
						 | 
					999b3a2e26 | ||
| 7ee577eee6 | |||
| d27ceb75dd | |||
| 65c2b794b5 | |||
| de82b08f70 | |||
| 1d03f515b9 | |||
| 1c4c287925 | |||
| 10bbfdc3b2 | |||
| e15f0b47c1 | |||
| 0fd0661be3 | |||
| 6628806142 | |||
| 17198a4abd | |||
| 
						 | 
					465e6f01b7 | ||
| 
						 | 
					0eec752216 | ||
| 
						 | 
					122195384e | ||
| 
						 | 
					2ae1c14c03 | ||
| 
						 | 
					0ddb7e707b | ||
| 
						 | 
					e2d8f67f63 | ||
| 
						 | 
					0d99f62027 | ||
| 
						 | 
					c23375cd65 | ||
| 
						 | 
					a762a0d9ff | ||
| 
						 | 
					f7ca6ca889 | ||
| 
						 | 
					ec4a9b7f6c | ||
| 
						 | 
					5341977948 | ||
| 
						 | 
					f0aed4672e | ||
| 344d251fc4 | |||
| f6c53e5039 | |||
| ba09cbae3e | |||
| 6aa000176f | |||
| 23b6172c31 | |||
| ca5eebe10c | |||
| 3f128443ab | |||
| 
						 | 
					1e554350ac | ||
| 
						 | 
					c79ea0dcef | ||
| 
						 | 
					e3f141f82f | ||
| 
						 | 
					a6dfa2386b | ||
| 
						 | 
					d9b5e66877 | ||
| 
						 | 
					8fd8bc25e9 | ||
| 
						 | 
					ba427abde9 | ||
| 
						 | 
					9b6ab6db16 | ||
| 
						 | 
					806a83d38b | ||
| 
						 | 
					7223753355 | ||
| 
						 | 
					b27bac4669 | ||
| 
						 | 
					c8a93d6a93 | ||
| 
						 | 
					04072a5e1f | ||
| 
						 | 
					574ea4f843 | ||
| 
						 | 
					f2ae9682ff | ||
| 
						 | 
					587f80cd93 | ||
| 
						 | 
					528eb773ad | ||
| 
						 | 
					e5657510b0 | ||
| 
						 | 
					f473919526 | ||
| 
						 | 
					8f1b0afc2a | ||
| 
						 | 
					1494b0f397 | ||
| 
						 | 
					ab56ccdd25 | ||
| cf2f69812b | |||
| 
						 | 
					c323425496 | ||
| 
						 | 
					a646260e82 | ||
| 
						 | 
					af9c8d1372 | ||
| 
						 | 
					650e02b344 | ||
| 
						 | 
					a524ca2a4b | ||
| 
						 | 
					23a7176b71 | ||
| 
						 | 
					b1192a8908 | ||
| 
						 | 
					e8dddb1596 | ||
| 
						 | 
					c7ba47bdc7 | ||
| 
						 | 
					e67fc2be18 | ||
| 
						 | 
					f473ef7591 | ||
| 
						 | 
					f7b1060aed | ||
| 
						 | 
					8052556275 | ||
| 
						 | 
					60d965f79e | ||
| 
						 | 
					83b15bfcdd | ||
| 
						 | 
					1ecbf9794d | ||
| 
						 | 
					2ded354403 | ||
| 
						 | 
					340428a1fe | ||
| 
						 | 
					c77b7ee897 | ||
| 
						 | 
					b6c3bc574b | ||
| 
						 | 
					1e355a51e1 | ||
| 
						 | 
					ad80f61fba | ||
| 
						 | 
					61469252fe | ||
| 
						 | 
					02198ac5b5 | ||
| 
						 | 
					21abaf7e91 | ||
| 
						 | 
					165bffc2e7 | ||
| 
						 | 
					644fd6d32e | ||
| 
						 | 
					f54e0ec9bd | ||
| 
						 | 
					a155a362da | ||
| 
						 | 
					60d4564151 | ||
| 
						 | 
					d4e57f4bc6 | ||
| 
						 | 
					3920b2c0ab | ||
| 
						 | 
					2733c4b93c | ||
| 
						 | 
					e17c773a0b | ||
| 
						 | 
					36a800f26c | ||
| 
						 | 
					b75da563d9 | ||
| 
						 | 
					f9faec38be | ||
| 
						 | 
					d6b64f47d9 | ||
| 
						 | 
					a359f7a9f5 | ||
| 
						 | 
					b606deb3f0 | ||
| 
						 | 
					090e7aa930 | ||
| 
						 | 
					2dce9c3cff | ||
| 
						 | 
					1e72bd8b8c | ||
| 
						 | 
					dc72293398 | ||
| 
						 | 
					e55c35734b | ||
| 
						 | 
					325e745daa | ||
| 
						 | 
					61413565d0 | ||
| 
						 | 
					ff129d9ad9 | ||
| 
						 | 
					03fcd3b33a | ||
| 
						 | 
					68b02da483 | ||
| 
						 | 
					e051119769 | ||
| 
						 | 
					f3661aac4f | ||
| 1eb169ac0b | |||
| 5674c3e241 | |||
| 62c4ba0d1e | |||
| 
						 | 
					497e7e4c53 | ||
| 19526d09c2 | |||
| 
						 | 
					6aeaf6f568 | ||
| 
						 | 
					40f2db9bc0 | ||
| 
						 | 
					2cfa20cc4e | ||
| 
						 | 
					a5f683d124 | ||
| 
						 | 
					02a57ffa6f | ||
| 
						 | 
					9f0d9ade68 | ||
| 
						 | 
					3425751cb8 | ||
| 
						 | 
					db5e8050a8 | ||
| 
						 | 
					a3fbabf404 | ||
| 
						 | 
					22422a84d9 | ||
| 
						 | 
					b6f6da923e | ||
| 
						 | 
					c9fadf97a5 | ||
| 
						 | 
					c650bb3f3d | ||
| 
						 | 
					81395e85d1 | ||
| 
						 | 
					340a29b735 | ||
| 
						 | 
					f7be108e35 | ||
| 
						 | 
					a0fc47c6f9 | ||
| 
						 | 
					42a9ac71d2 | ||
| 
						 | 
					41c2b09184 | ||
| 
						 | 
					294dbf1bf0 | ||
| 
						 | 
					9548c8b91f | ||
| 
						 | 
					7f927a541c | ||
| 
						 | 
					e2f73e3ead | ||
| 
						 | 
					6371676a75 | ||
| 
						 | 
					bd84c23298 | ||
| 
						 | 
					7aa8d5e8af | ||
| 
						 | 
					6012b0ec23 | ||
| 
						 | 
					411ac49dd7 | ||
| 
						 | 
					b8fb05a422 | ||
| 
						 | 
					5c57d4f403 | ||
| 
						 | 
					fc6ad65751 | ||
| 
						 | 
					dafc74020c | ||
| 
						 | 
					d19321dfde | ||
| 
						 | 
					5924e5a562 | ||
| 
						 | 
					c99d748da6 | ||
| 
						 | 
					02452afd36 | ||
| 
						 | 
					331768dcff | ||
| 
						 | 
					4aac345bea | ||
| 
						 | 
					15c0022042 | ||
| 
						 | 
					aae8bf31a7 | ||
| 
						 | 
					1e68b1c1bd | ||
| 
						 | 
					491a708225 | ||
| 
						 | 
					5a80930dd2 | ||
| 
						 | 
					145a295231 | ||
| 
						 | 
					841a37f941 | ||
| 
						 | 
					e6cad3821c | ||
| 
						 | 
					98de1cbb6a | ||
| 
						 | 
					f7d61b8b81 | ||
| 
						 | 
					78c4e862ef | ||
| 1e0be161e5 | |||
| 
						 | 
					0afcf1cf13 | ||
| 
						 | 
					08edbb5cbe | ||
| 
						 | 
					0abfbcc8eb | ||
| 
						 | 
					1b94253ba4 | ||
| 
						 | 
					36e6f9ac7b | ||
| 
						 | 
					2f41691c11 | ||
| 
						 | 
					09bfe52840 | ||
| 
						 | 
					8c9010d0f4 | ||
| 
						 | 
					42c583265c | ||
| 
						 | 
					539d698492 | ||
| 
						 | 
					31ca609d12 | ||
| 
						 | 
					5710966324 | ||
| 
						 | 
					e108e708a3 | ||
| 
						 | 
					6f0198d4d9 | ||
| 
						 | 
					67ccb043f1 | ||
| 
						 | 
					24a5a81c53 | ||
| 
						 | 
					eb1759d7ea | ||
| 
						 | 
					34a0fde2ad | ||
| 
						 | 
					bc34b7e808 | ||
| 
						 | 
					284453c5e9 | ||
| 
						 | 
					77054bd61c | ||
| 
						 | 
					f2b4edc090 | ||
| 
						 | 
					fb81acca3c | ||
| 
						 | 
					ee9ecb6115 | 
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -94,7 +94,7 @@ Thumbs.db
 | 
			
		||||
 | 
			
		||||
# build directory #
 | 
			
		||||
###################
 | 
			
		||||
build/*
 | 
			
		||||
build*/*
 | 
			
		||||
 | 
			
		||||
# IDE related files #
 | 
			
		||||
#####################
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										79
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								.travis.yml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
			
		||||
language: cpp
 | 
			
		||||
 | 
			
		||||
matrix:
 | 
			
		||||
  include:
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode7.2
 | 
			
		||||
      compiler: clang
 | 
			
		||||
    - os:        osx
 | 
			
		||||
      osx_image: xcode7.2
 | 
			
		||||
      compiler: gcc
 | 
			
		||||
      env: VERSION=-5
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-4.9
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-4.9
 | 
			
		||||
    - compiler: gcc
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
          packages:
 | 
			
		||||
            - g++-5
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-5
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
            - llvm-toolchain-precise-3.7
 | 
			
		||||
          packages:
 | 
			
		||||
            - clang-3.7
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-3.7
 | 
			
		||||
    - compiler: clang
 | 
			
		||||
      addons:
 | 
			
		||||
        apt:
 | 
			
		||||
          sources:
 | 
			
		||||
            - ubuntu-toolchain-r-test
 | 
			
		||||
            - llvm-toolchain-precise-3.8
 | 
			
		||||
          packages:
 | 
			
		||||
            - clang-3.8
 | 
			
		||||
            - libmpfr-dev
 | 
			
		||||
            - libgmp-dev
 | 
			
		||||
            - libmpc-dev
 | 
			
		||||
            - binutils-dev
 | 
			
		||||
      env: VERSION=-3.8
 | 
			
		||||
      
 | 
			
		||||
before_install:
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
 | 
			
		||||
    
 | 
			
		||||
install:
 | 
			
		||||
    - export CC=$CC$VERSION
 | 
			
		||||
    - export CXX=$CXX$VERSION
 | 
			
		||||
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
 | 
			
		||||
    
 | 
			
		||||
script:
 | 
			
		||||
    - ./scripts/reconfigure_script
 | 
			
		||||
    - mkdir build
 | 
			
		||||
    - cd build
 | 
			
		||||
    - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
 | 
			
		||||
    - make -j4
 | 
			
		||||
    - ./benchmarks/Benchmark_dwf --threads 1
 | 
			
		||||
							
								
								
									
										9
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								AUTHORS
									
									
									
									
									
								
							@@ -1,5 +1,4 @@
 | 
			
		||||
Peter Boyle
 | 
			
		||||
Azusa Yamaguchi
 | 
			
		||||
Intel Parallel Computing Centre @ Higgs Centre for Theoretical Physics
 | 
			
		||||
University of Edinburgh
 | 
			
		||||
Scotland, UK
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										876
									
								
								COPYING
									
									
									
									
									
								
							
							
						
						
									
										876
									
								
								COPYING
									
									
									
									
									
								
							@@ -1,622 +1,281 @@
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 3, 29 June 2007
 | 
			
		||||
                   GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 2, June 1991
 | 
			
		||||
 | 
			
		||||
 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 Everyone is permitted to copy and distribute verbatim copies
 | 
			
		||||
 of this license document, but changing it is not allowed.
 | 
			
		||||
 | 
			
		||||
                            Preamble
 | 
			
		||||
 | 
			
		||||
  The GNU General Public License is a free, copyleft license for
 | 
			
		||||
software and other kinds of works.
 | 
			
		||||
 | 
			
		||||
  The licenses for most software and other practical works are designed
 | 
			
		||||
to take away your freedom to share and change the works.  By contrast,
 | 
			
		||||
the GNU General Public License is intended to guarantee your freedom to
 | 
			
		||||
share and change all versions of a program--to make sure it remains free
 | 
			
		||||
software for all its users.  We, the Free Software Foundation, use the
 | 
			
		||||
GNU General Public License for most of our software; it applies also to
 | 
			
		||||
any other work released this way by its authors.  You can apply it to
 | 
			
		||||
  The licenses for most software are designed to take away your
 | 
			
		||||
freedom to share and change it.  By contrast, the GNU General Public
 | 
			
		||||
License is intended to guarantee your freedom to share and change free
 | 
			
		||||
software--to make sure the software is free for all its users.  This
 | 
			
		||||
General Public License applies to most of the Free Software
 | 
			
		||||
Foundation's software and to any other program whose authors commit to
 | 
			
		||||
using it.  (Some other Free Software Foundation software is covered by
 | 
			
		||||
the GNU Lesser General Public License instead.)  You can apply it to
 | 
			
		||||
your programs, too.
 | 
			
		||||
 | 
			
		||||
  When we speak of free software, we are referring to freedom, not
 | 
			
		||||
price.  Our General Public Licenses are designed to make sure that you
 | 
			
		||||
have the freedom to distribute copies of free software (and charge for
 | 
			
		||||
them if you wish), that you receive source code or can get it if you
 | 
			
		||||
want it, that you can change the software or use pieces of it in new
 | 
			
		||||
free programs, and that you know you can do these things.
 | 
			
		||||
this service if you wish), that you receive source code or can get it
 | 
			
		||||
if you want it, that you can change the software or use pieces of it
 | 
			
		||||
in new free programs; and that you know you can do these things.
 | 
			
		||||
 | 
			
		||||
  To protect your rights, we need to prevent others from denying you
 | 
			
		||||
these rights or asking you to surrender the rights.  Therefore, you have
 | 
			
		||||
certain responsibilities if you distribute copies of the software, or if
 | 
			
		||||
you modify it: responsibilities to respect the freedom of others.
 | 
			
		||||
  To protect your rights, we need to make restrictions that forbid
 | 
			
		||||
anyone to deny you these rights or to ask you to surrender the rights.
 | 
			
		||||
These restrictions translate to certain responsibilities for you if you
 | 
			
		||||
distribute copies of the software, or if you modify it.
 | 
			
		||||
 | 
			
		||||
  For example, if you distribute copies of such a program, whether
 | 
			
		||||
gratis or for a fee, you must pass on to the recipients the same
 | 
			
		||||
freedoms that you received.  You must make sure that they, too, receive
 | 
			
		||||
or can get the source code.  And you must show them these terms so they
 | 
			
		||||
know their rights.
 | 
			
		||||
gratis or for a fee, you must give the recipients all the rights that
 | 
			
		||||
you have.  You must make sure that they, too, receive or can get the
 | 
			
		||||
source code.  And you must show them these terms so they know their
 | 
			
		||||
rights.
 | 
			
		||||
 | 
			
		||||
  Developers that use the GNU GPL protect your rights with two steps:
 | 
			
		||||
(1) assert copyright on the software, and (2) offer you this License
 | 
			
		||||
giving you legal permission to copy, distribute and/or modify it.
 | 
			
		||||
  We protect your rights with two steps: (1) copyright the software, and
 | 
			
		||||
(2) offer you this license which gives you legal permission to copy,
 | 
			
		||||
distribute and/or modify the software.
 | 
			
		||||
 | 
			
		||||
  For the developers' and authors' protection, the GPL clearly explains
 | 
			
		||||
that there is no warranty for this free software.  For both users' and
 | 
			
		||||
authors' sake, the GPL requires that modified versions be marked as
 | 
			
		||||
changed, so that their problems will not be attributed erroneously to
 | 
			
		||||
authors of previous versions.
 | 
			
		||||
  Also, for each author's protection and ours, we want to make certain
 | 
			
		||||
that everyone understands that there is no warranty for this free
 | 
			
		||||
software.  If the software is modified by someone else and passed on, we
 | 
			
		||||
want its recipients to know that what they have is not the original, so
 | 
			
		||||
that any problems introduced by others will not reflect on the original
 | 
			
		||||
authors' reputations.
 | 
			
		||||
 | 
			
		||||
  Some devices are designed to deny users access to install or run
 | 
			
		||||
modified versions of the software inside them, although the manufacturer
 | 
			
		||||
can do so.  This is fundamentally incompatible with the aim of
 | 
			
		||||
protecting users' freedom to change the software.  The systematic
 | 
			
		||||
pattern of such abuse occurs in the area of products for individuals to
 | 
			
		||||
use, which is precisely where it is most unacceptable.  Therefore, we
 | 
			
		||||
have designed this version of the GPL to prohibit the practice for those
 | 
			
		||||
products.  If such problems arise substantially in other domains, we
 | 
			
		||||
stand ready to extend this provision to those domains in future versions
 | 
			
		||||
of the GPL, as needed to protect the freedom of users.
 | 
			
		||||
 | 
			
		||||
  Finally, every program is threatened constantly by software patents.
 | 
			
		||||
States should not allow patents to restrict development and use of
 | 
			
		||||
software on general-purpose computers, but in those that do, we wish to
 | 
			
		||||
avoid the special danger that patents applied to a free program could
 | 
			
		||||
make it effectively proprietary.  To prevent this, the GPL assures that
 | 
			
		||||
patents cannot be used to render the program non-free.
 | 
			
		||||
  Finally, any free program is threatened constantly by software
 | 
			
		||||
patents.  We wish to avoid the danger that redistributors of a free
 | 
			
		||||
program will individually obtain patent licenses, in effect making the
 | 
			
		||||
program proprietary.  To prevent this, we have made it clear that any
 | 
			
		||||
patent must be licensed for everyone's free use or not licensed at all.
 | 
			
		||||
 | 
			
		||||
  The precise terms and conditions for copying, distribution and
 | 
			
		||||
modification follow.
 | 
			
		||||
 | 
			
		||||
                       TERMS AND CONDITIONS
 | 
			
		||||
 | 
			
		||||
  0. Definitions.
 | 
			
		||||
 | 
			
		||||
  "This License" refers to version 3 of the GNU General Public License.
 | 
			
		||||
 | 
			
		||||
  "Copyright" also means copyright-like laws that apply to other kinds of
 | 
			
		||||
works, such as semiconductor masks.
 | 
			
		||||
 | 
			
		||||
  "The Program" refers to any copyrightable work licensed under this
 | 
			
		||||
License.  Each licensee is addressed as "you".  "Licensees" and
 | 
			
		||||
"recipients" may be individuals or organizations.
 | 
			
		||||
 | 
			
		||||
  To "modify" a work means to copy from or adapt all or part of the work
 | 
			
		||||
in a fashion requiring copyright permission, other than the making of an
 | 
			
		||||
exact copy.  The resulting work is called a "modified version" of the
 | 
			
		||||
earlier work or a work "based on" the earlier work.
 | 
			
		||||
 | 
			
		||||
  A "covered work" means either the unmodified Program or a work based
 | 
			
		||||
on the Program.
 | 
			
		||||
 | 
			
		||||
  To "propagate" a work means to do anything with it that, without
 | 
			
		||||
permission, would make you directly or secondarily liable for
 | 
			
		||||
infringement under applicable copyright law, except executing it on a
 | 
			
		||||
computer or modifying a private copy.  Propagation includes copying,
 | 
			
		||||
distribution (with or without modification), making available to the
 | 
			
		||||
public, and in some countries other activities as well.
 | 
			
		||||
 | 
			
		||||
  To "convey" a work means any kind of propagation that enables other
 | 
			
		||||
parties to make or receive copies.  Mere interaction with a user through
 | 
			
		||||
a computer network, with no transfer of a copy, is not conveying.
 | 
			
		||||
 | 
			
		||||
  An interactive user interface displays "Appropriate Legal Notices"
 | 
			
		||||
to the extent that it includes a convenient and prominently visible
 | 
			
		||||
feature that (1) displays an appropriate copyright notice, and (2)
 | 
			
		||||
tells the user that there is no warranty for the work (except to the
 | 
			
		||||
extent that warranties are provided), that licensees may convey the
 | 
			
		||||
work under this License, and how to view a copy of this License.  If
 | 
			
		||||
the interface presents a list of user commands or options, such as a
 | 
			
		||||
menu, a prominent item in the list meets this criterion.
 | 
			
		||||
 | 
			
		||||
  1. Source Code.
 | 
			
		||||
 | 
			
		||||
  The "source code" for a work means the preferred form of the work
 | 
			
		||||
for making modifications to it.  "Object code" means any non-source
 | 
			
		||||
form of a work.
 | 
			
		||||
 | 
			
		||||
  A "Standard Interface" means an interface that either is an official
 | 
			
		||||
standard defined by a recognized standards body, or, in the case of
 | 
			
		||||
interfaces specified for a particular programming language, one that
 | 
			
		||||
is widely used among developers working in that language.
 | 
			
		||||
 | 
			
		||||
  The "System Libraries" of an executable work include anything, other
 | 
			
		||||
than the work as a whole, that (a) is included in the normal form of
 | 
			
		||||
packaging a Major Component, but which is not part of that Major
 | 
			
		||||
Component, and (b) serves only to enable use of the work with that
 | 
			
		||||
Major Component, or to implement a Standard Interface for which an
 | 
			
		||||
implementation is available to the public in source code form.  A
 | 
			
		||||
"Major Component", in this context, means a major essential component
 | 
			
		||||
(kernel, window system, and so on) of the specific operating system
 | 
			
		||||
(if any) on which the executable work runs, or a compiler used to
 | 
			
		||||
produce the work, or an object code interpreter used to run it.
 | 
			
		||||
 | 
			
		||||
  The "Corresponding Source" for a work in object code form means all
 | 
			
		||||
the source code needed to generate, install, and (for an executable
 | 
			
		||||
work) run the object code and to modify the work, including scripts to
 | 
			
		||||
control those activities.  However, it does not include the work's
 | 
			
		||||
System Libraries, or general-purpose tools or generally available free
 | 
			
		||||
programs which are used unmodified in performing those activities but
 | 
			
		||||
which are not part of the work.  For example, Corresponding Source
 | 
			
		||||
includes interface definition files associated with source files for
 | 
			
		||||
the work, and the source code for shared libraries and dynamically
 | 
			
		||||
linked subprograms that the work is specifically designed to require,
 | 
			
		||||
such as by intimate data communication or control flow between those
 | 
			
		||||
subprograms and other parts of the work.
 | 
			
		||||
 | 
			
		||||
  The Corresponding Source need not include anything that users
 | 
			
		||||
can regenerate automatically from other parts of the Corresponding
 | 
			
		||||
Source.
 | 
			
		||||
 | 
			
		||||
  The Corresponding Source for a work in source code form is that
 | 
			
		||||
same work.
 | 
			
		||||
 | 
			
		||||
  2. Basic Permissions.
 | 
			
		||||
 | 
			
		||||
  All rights granted under this License are granted for the term of
 | 
			
		||||
copyright on the Program, and are irrevocable provided the stated
 | 
			
		||||
conditions are met.  This License explicitly affirms your unlimited
 | 
			
		||||
permission to run the unmodified Program.  The output from running a
 | 
			
		||||
covered work is covered by this License only if the output, given its
 | 
			
		||||
content, constitutes a covered work.  This License acknowledges your
 | 
			
		||||
rights of fair use or other equivalent, as provided by copyright law.
 | 
			
		||||
 | 
			
		||||
  You may make, run and propagate covered works that you do not
 | 
			
		||||
convey, without conditions so long as your license otherwise remains
 | 
			
		||||
in force.  You may convey covered works to others for the sole purpose
 | 
			
		||||
of having them make modifications exclusively for you, or provide you
 | 
			
		||||
with facilities for running those works, provided that you comply with
 | 
			
		||||
the terms of this License in conveying all material for which you do
 | 
			
		||||
not control copyright.  Those thus making or running the covered works
 | 
			
		||||
for you must do so exclusively on your behalf, under your direction
 | 
			
		||||
and control, on terms that prohibit them from making any copies of
 | 
			
		||||
your copyrighted material outside their relationship with you.
 | 
			
		||||
 | 
			
		||||
  Conveying under any other circumstances is permitted solely under
 | 
			
		||||
the conditions stated below.  Sublicensing is not allowed; section 10
 | 
			
		||||
makes it unnecessary.
 | 
			
		||||
 | 
			
		||||
  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
 | 
			
		||||
 | 
			
		||||
  No covered work shall be deemed part of an effective technological
 | 
			
		||||
measure under any applicable law fulfilling obligations under article
 | 
			
		||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
 | 
			
		||||
similar laws prohibiting or restricting circumvention of such
 | 
			
		||||
measures.
 | 
			
		||||
 | 
			
		||||
  When you convey a covered work, you waive any legal power to forbid
 | 
			
		||||
circumvention of technological measures to the extent such circumvention
 | 
			
		||||
is effected by exercising rights under this License with respect to
 | 
			
		||||
the covered work, and you disclaim any intention to limit operation or
 | 
			
		||||
modification of the work as a means of enforcing, against the work's
 | 
			
		||||
users, your or third parties' legal rights to forbid circumvention of
 | 
			
		||||
technological measures.
 | 
			
		||||
 | 
			
		||||
  4. Conveying Verbatim Copies.
 | 
			
		||||
 | 
			
		||||
  You may convey verbatim copies of the Program's source code as you
 | 
			
		||||
receive it, in any medium, provided that you conspicuously and
 | 
			
		||||
appropriately publish on each copy an appropriate copyright notice;
 | 
			
		||||
keep intact all notices stating that this License and any
 | 
			
		||||
non-permissive terms added in accord with section 7 apply to the code;
 | 
			
		||||
keep intact all notices of the absence of any warranty; and give all
 | 
			
		||||
recipients a copy of this License along with the Program.
 | 
			
		||||
 | 
			
		||||
  You may charge any price or no price for each copy that you convey,
 | 
			
		||||
and you may offer support or warranty protection for a fee.
 | 
			
		||||
 | 
			
		||||
  5. Conveying Modified Source Versions.
 | 
			
		||||
 | 
			
		||||
  You may convey a work based on the Program, or the modifications to
 | 
			
		||||
produce it from the Program, in the form of source code under the
 | 
			
		||||
terms of section 4, provided that you also meet all of these conditions:
 | 
			
		||||
 | 
			
		||||
    a) The work must carry prominent notices stating that you modified
 | 
			
		||||
    it, and giving a relevant date.
 | 
			
		||||
 | 
			
		||||
    b) The work must carry prominent notices stating that it is
 | 
			
		||||
    released under this License and any conditions added under section
 | 
			
		||||
    7.  This requirement modifies the requirement in section 4 to
 | 
			
		||||
    "keep intact all notices".
 | 
			
		||||
 | 
			
		||||
    c) You must license the entire work, as a whole, under this
 | 
			
		||||
    License to anyone who comes into possession of a copy.  This
 | 
			
		||||
    License will therefore apply, along with any applicable section 7
 | 
			
		||||
    additional terms, to the whole of the work, and all its parts,
 | 
			
		||||
    regardless of how they are packaged.  This License gives no
 | 
			
		||||
    permission to license the work in any other way, but it does not
 | 
			
		||||
    invalidate such permission if you have separately received it.
 | 
			
		||||
 | 
			
		||||
    d) If the work has interactive user interfaces, each must display
 | 
			
		||||
    Appropriate Legal Notices; however, if the Program has interactive
 | 
			
		||||
    interfaces that do not display Appropriate Legal Notices, your
 | 
			
		||||
    work need not make them do so.
 | 
			
		||||
 | 
			
		||||
  A compilation of a covered work with other separate and independent
 | 
			
		||||
works, which are not by their nature extensions of the covered work,
 | 
			
		||||
and which are not combined with it such as to form a larger program,
 | 
			
		||||
in or on a volume of a storage or distribution medium, is called an
 | 
			
		||||
"aggregate" if the compilation and its resulting copyright are not
 | 
			
		||||
used to limit the access or legal rights of the compilation's users
 | 
			
		||||
beyond what the individual works permit.  Inclusion of a covered work
 | 
			
		||||
in an aggregate does not cause this License to apply to the other
 | 
			
		||||
parts of the aggregate.
 | 
			
		||||
 | 
			
		||||
  6. Conveying Non-Source Forms.
 | 
			
		||||
 | 
			
		||||
  You may convey a covered work in object code form under the terms
 | 
			
		||||
of sections 4 and 5, provided that you also convey the
 | 
			
		||||
machine-readable Corresponding Source under the terms of this License,
 | 
			
		||||
in one of these ways:
 | 
			
		||||
 | 
			
		||||
    a) Convey the object code in, or embodied in, a physical product
 | 
			
		||||
    (including a physical distribution medium), accompanied by the
 | 
			
		||||
    Corresponding Source fixed on a durable physical medium
 | 
			
		||||
    customarily used for software interchange.
 | 
			
		||||
 | 
			
		||||
    b) Convey the object code in, or embodied in, a physical product
 | 
			
		||||
    (including a physical distribution medium), accompanied by a
 | 
			
		||||
    written offer, valid for at least three years and valid for as
 | 
			
		||||
    long as you offer spare parts or customer support for that product
 | 
			
		||||
    model, to give anyone who possesses the object code either (1) a
 | 
			
		||||
    copy of the Corresponding Source for all the software in the
 | 
			
		||||
    product that is covered by this License, on a durable physical
 | 
			
		||||
    medium customarily used for software interchange, for a price no
 | 
			
		||||
    more than your reasonable cost of physically performing this
 | 
			
		||||
    conveying of source, or (2) access to copy the
 | 
			
		||||
    Corresponding Source from a network server at no charge.
 | 
			
		||||
 | 
			
		||||
    c) Convey individual copies of the object code with a copy of the
 | 
			
		||||
    written offer to provide the Corresponding Source.  This
 | 
			
		||||
    alternative is allowed only occasionally and noncommercially, and
 | 
			
		||||
    only if you received the object code with such an offer, in accord
 | 
			
		||||
    with subsection 6b.
 | 
			
		||||
 | 
			
		||||
    d) Convey the object code by offering access from a designated
 | 
			
		||||
    place (gratis or for a charge), and offer equivalent access to the
 | 
			
		||||
    Corresponding Source in the same way through the same place at no
 | 
			
		||||
    further charge.  You need not require recipients to copy the
 | 
			
		||||
    Corresponding Source along with the object code.  If the place to
 | 
			
		||||
    copy the object code is a network server, the Corresponding Source
 | 
			
		||||
    may be on a different server (operated by you or a third party)
 | 
			
		||||
    that supports equivalent copying facilities, provided you maintain
 | 
			
		||||
    clear directions next to the object code saying where to find the
 | 
			
		||||
    Corresponding Source.  Regardless of what server hosts the
 | 
			
		||||
    Corresponding Source, you remain obligated to ensure that it is
 | 
			
		||||
    available for as long as needed to satisfy these requirements.
 | 
			
		||||
 | 
			
		||||
    e) Convey the object code using peer-to-peer transmission, provided
 | 
			
		||||
    you inform other peers where the object code and Corresponding
 | 
			
		||||
    Source of the work are being offered to the general public at no
 | 
			
		||||
    charge under subsection 6d.
 | 
			
		||||
 | 
			
		||||
  A separable portion of the object code, whose source code is excluded
 | 
			
		||||
from the Corresponding Source as a System Library, need not be
 | 
			
		||||
included in conveying the object code work.
 | 
			
		||||
 | 
			
		||||
  A "User Product" is either (1) a "consumer product", which means any
 | 
			
		||||
tangible personal property which is normally used for personal, family,
 | 
			
		||||
or household purposes, or (2) anything designed or sold for incorporation
 | 
			
		||||
into a dwelling.  In determining whether a product is a consumer product,
 | 
			
		||||
doubtful cases shall be resolved in favor of coverage.  For a particular
 | 
			
		||||
product received by a particular user, "normally used" refers to a
 | 
			
		||||
typical or common use of that class of product, regardless of the status
 | 
			
		||||
of the particular user or of the way in which the particular user
 | 
			
		||||
actually uses, or expects or is expected to use, the product.  A product
 | 
			
		||||
is a consumer product regardless of whether the product has substantial
 | 
			
		||||
commercial, industrial or non-consumer uses, unless such uses represent
 | 
			
		||||
the only significant mode of use of the product.
 | 
			
		||||
 | 
			
		||||
  "Installation Information" for a User Product means any methods,
 | 
			
		||||
procedures, authorization keys, or other information required to install
 | 
			
		||||
and execute modified versions of a covered work in that User Product from
 | 
			
		||||
a modified version of its Corresponding Source.  The information must
 | 
			
		||||
suffice to ensure that the continued functioning of the modified object
 | 
			
		||||
code is in no case prevented or interfered with solely because
 | 
			
		||||
modification has been made.
 | 
			
		||||
 | 
			
		||||
  If you convey an object code work under this section in, or with, or
 | 
			
		||||
specifically for use in, a User Product, and the conveying occurs as
 | 
			
		||||
part of a transaction in which the right of possession and use of the
 | 
			
		||||
User Product is transferred to the recipient in perpetuity or for a
 | 
			
		||||
fixed term (regardless of how the transaction is characterized), the
 | 
			
		||||
Corresponding Source conveyed under this section must be accompanied
 | 
			
		||||
by the Installation Information.  But this requirement does not apply
 | 
			
		||||
if neither you nor any third party retains the ability to install
 | 
			
		||||
modified object code on the User Product (for example, the work has
 | 
			
		||||
been installed in ROM).
 | 
			
		||||
 | 
			
		||||
  The requirement to provide Installation Information does not include a
 | 
			
		||||
requirement to continue to provide support service, warranty, or updates
 | 
			
		||||
for a work that has been modified or installed by the recipient, or for
 | 
			
		||||
the User Product in which it has been modified or installed.  Access to a
 | 
			
		||||
network may be denied when the modification itself materially and
 | 
			
		||||
adversely affects the operation of the network or violates the rules and
 | 
			
		||||
protocols for communication across the network.
 | 
			
		||||
 | 
			
		||||
  Corresponding Source conveyed, and Installation Information provided,
 | 
			
		||||
in accord with this section must be in a format that is publicly
 | 
			
		||||
documented (and with an implementation available to the public in
 | 
			
		||||
source code form), and must require no special password or key for
 | 
			
		||||
unpacking, reading or copying.
 | 
			
		||||
 | 
			
		||||
  7. Additional Terms.
 | 
			
		||||
 | 
			
		||||
  "Additional permissions" are terms that supplement the terms of this
 | 
			
		||||
License by making exceptions from one or more of its conditions.
 | 
			
		||||
Additional permissions that are applicable to the entire Program shall
 | 
			
		||||
be treated as though they were included in this License, to the extent
 | 
			
		||||
that they are valid under applicable law.  If additional permissions
 | 
			
		||||
apply only to part of the Program, that part may be used separately
 | 
			
		||||
under those permissions, but the entire Program remains governed by
 | 
			
		||||
this License without regard to the additional permissions.
 | 
			
		||||
 | 
			
		||||
  When you convey a copy of a covered work, you may at your option
 | 
			
		||||
remove any additional permissions from that copy, or from any part of
 | 
			
		||||
it.  (Additional permissions may be written to require their own
 | 
			
		||||
removal in certain cases when you modify the work.)  You may place
 | 
			
		||||
additional permissions on material, added by you to a covered work,
 | 
			
		||||
for which you have or can give appropriate copyright permission.
 | 
			
		||||
 | 
			
		||||
  Notwithstanding any other provision of this License, for material you
 | 
			
		||||
add to a covered work, you may (if authorized by the copyright holders of
 | 
			
		||||
that material) supplement the terms of this License with terms:
 | 
			
		||||
 | 
			
		||||
    a) Disclaiming warranty or limiting liability differently from the
 | 
			
		||||
    terms of sections 15 and 16 of this License; or
 | 
			
		||||
 | 
			
		||||
    b) Requiring preservation of specified reasonable legal notices or
 | 
			
		||||
    author attributions in that material or in the Appropriate Legal
 | 
			
		||||
    Notices displayed by works containing it; or
 | 
			
		||||
 | 
			
		||||
    c) Prohibiting misrepresentation of the origin of that material, or
 | 
			
		||||
    requiring that modified versions of such material be marked in
 | 
			
		||||
    reasonable ways as different from the original version; or
 | 
			
		||||
 | 
			
		||||
    d) Limiting the use for publicity purposes of names of licensors or
 | 
			
		||||
    authors of the material; or
 | 
			
		||||
 | 
			
		||||
    e) Declining to grant rights under trademark law for use of some
 | 
			
		||||
    trade names, trademarks, or service marks; or
 | 
			
		||||
 | 
			
		||||
    f) Requiring indemnification of licensors and authors of that
 | 
			
		||||
    material by anyone who conveys the material (or modified versions of
 | 
			
		||||
    it) with contractual assumptions of liability to the recipient, for
 | 
			
		||||
    any liability that these contractual assumptions directly impose on
 | 
			
		||||
    those licensors and authors.
 | 
			
		||||
 | 
			
		||||
  All other non-permissive additional terms are considered "further
 | 
			
		||||
restrictions" within the meaning of section 10.  If the Program as you
 | 
			
		||||
received it, or any part of it, contains a notice stating that it is
 | 
			
		||||
governed by this License along with a term that is a further
 | 
			
		||||
restriction, you may remove that term.  If a license document contains
 | 
			
		||||
a further restriction but permits relicensing or conveying under this
 | 
			
		||||
License, you may add to a covered work material governed by the terms
 | 
			
		||||
of that license document, provided that the further restriction does
 | 
			
		||||
not survive such relicensing or conveying.
 | 
			
		||||
 | 
			
		||||
  If you add terms to a covered work in accord with this section, you
 | 
			
		||||
must place, in the relevant source files, a statement of the
 | 
			
		||||
additional terms that apply to those files, or a notice indicating
 | 
			
		||||
where to find the applicable terms.
 | 
			
		||||
 | 
			
		||||
  Additional terms, permissive or non-permissive, may be stated in the
 | 
			
		||||
form of a separately written license, or stated as exceptions;
 | 
			
		||||
the above requirements apply either way.
 | 
			
		||||
 | 
			
		||||
  8. Termination.
 | 
			
		||||
 | 
			
		||||
  You may not propagate or modify a covered work except as expressly
 | 
			
		||||
provided under this License.  Any attempt otherwise to propagate or
 | 
			
		||||
modify it is void, and will automatically terminate your rights under
 | 
			
		||||
this License (including any patent licenses granted under the third
 | 
			
		||||
paragraph of section 11).
 | 
			
		||||
 | 
			
		||||
  However, if you cease all violation of this License, then your
 | 
			
		||||
license from a particular copyright holder is reinstated (a)
 | 
			
		||||
provisionally, unless and until the copyright holder explicitly and
 | 
			
		||||
finally terminates your license, and (b) permanently, if the copyright
 | 
			
		||||
holder fails to notify you of the violation by some reasonable means
 | 
			
		||||
prior to 60 days after the cessation.
 | 
			
		||||
 | 
			
		||||
  Moreover, your license from a particular copyright holder is
 | 
			
		||||
reinstated permanently if the copyright holder notifies you of the
 | 
			
		||||
violation by some reasonable means, this is the first time you have
 | 
			
		||||
received notice of violation of this License (for any work) from that
 | 
			
		||||
copyright holder, and you cure the violation prior to 30 days after
 | 
			
		||||
your receipt of the notice.
 | 
			
		||||
 | 
			
		||||
  Termination of your rights under this section does not terminate the
 | 
			
		||||
licenses of parties who have received copies or rights from you under
 | 
			
		||||
this License.  If your rights have been terminated and not permanently
 | 
			
		||||
reinstated, you do not qualify to receive new licenses for the same
 | 
			
		||||
material under section 10.
 | 
			
		||||
 | 
			
		||||
  9. Acceptance Not Required for Having Copies.
 | 
			
		||||
 | 
			
		||||
  You are not required to accept this License in order to receive or
 | 
			
		||||
run a copy of the Program.  Ancillary propagation of a covered work
 | 
			
		||||
occurring solely as a consequence of using peer-to-peer transmission
 | 
			
		||||
to receive a copy likewise does not require acceptance.  However,
 | 
			
		||||
nothing other than this License grants you permission to propagate or
 | 
			
		||||
modify any covered work.  These actions infringe copyright if you do
 | 
			
		||||
not accept this License.  Therefore, by modifying or propagating a
 | 
			
		||||
covered work, you indicate your acceptance of this License to do so.
 | 
			
		||||
 | 
			
		||||
  10. Automatic Licensing of Downstream Recipients.
 | 
			
		||||
 | 
			
		||||
  Each time you convey a covered work, the recipient automatically
 | 
			
		||||
receives a license from the original licensors, to run, modify and
 | 
			
		||||
propagate that work, subject to this License.  You are not responsible
 | 
			
		||||
for enforcing compliance by third parties with this License.
 | 
			
		||||
 | 
			
		||||
  An "entity transaction" is a transaction transferring control of an
 | 
			
		||||
organization, or substantially all assets of one, or subdividing an
 | 
			
		||||
organization, or merging organizations.  If propagation of a covered
 | 
			
		||||
work results from an entity transaction, each party to that
 | 
			
		||||
transaction who receives a copy of the work also receives whatever
 | 
			
		||||
licenses to the work the party's predecessor in interest had or could
 | 
			
		||||
give under the previous paragraph, plus a right to possession of the
 | 
			
		||||
Corresponding Source of the work from the predecessor in interest, if
 | 
			
		||||
the predecessor has it or can get it with reasonable efforts.
 | 
			
		||||
 | 
			
		||||
  You may not impose any further restrictions on the exercise of the
 | 
			
		||||
rights granted or affirmed under this License.  For example, you may
 | 
			
		||||
not impose a license fee, royalty, or other charge for exercise of
 | 
			
		||||
rights granted under this License, and you may not initiate litigation
 | 
			
		||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
 | 
			
		||||
any patent claim is infringed by making, using, selling, offering for
 | 
			
		||||
sale, or importing the Program or any portion of it.
 | 
			
		||||
 | 
			
		||||
  11. Patents.
 | 
			
		||||
 | 
			
		||||
  A "contributor" is a copyright holder who authorizes use under this
 | 
			
		||||
License of the Program or a work on which the Program is based.  The
 | 
			
		||||
work thus licensed is called the contributor's "contributor version".
 | 
			
		||||
 | 
			
		||||
  A contributor's "essential patent claims" are all patent claims
 | 
			
		||||
owned or controlled by the contributor, whether already acquired or
 | 
			
		||||
hereafter acquired, that would be infringed by some manner, permitted
 | 
			
		||||
by this License, of making, using, or selling its contributor version,
 | 
			
		||||
but do not include claims that would be infringed only as a
 | 
			
		||||
consequence of further modification of the contributor version.  For
 | 
			
		||||
purposes of this definition, "control" includes the right to grant
 | 
			
		||||
patent sublicenses in a manner consistent with the requirements of
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 | 
			
		||||
 | 
			
		||||
  0. This License applies to any program or other work which contains
 | 
			
		||||
a notice placed by the copyright holder saying it may be distributed
 | 
			
		||||
under the terms of this General Public License.  The "Program", below,
 | 
			
		||||
refers to any such program or work, and a "work based on the Program"
 | 
			
		||||
means either the Program or any derivative work under copyright law:
 | 
			
		||||
that is to say, a work containing the Program or a portion of it,
 | 
			
		||||
either verbatim or with modifications and/or translated into another
 | 
			
		||||
language.  (Hereinafter, translation is included without limitation in
 | 
			
		||||
the term "modification".)  Each licensee is addressed as "you".
 | 
			
		||||
 | 
			
		||||
Activities other than copying, distribution and modification are not
 | 
			
		||||
covered by this License; they are outside its scope.  The act of
 | 
			
		||||
running the Program is not restricted, and the output from the Program
 | 
			
		||||
is covered only if its contents constitute a work based on the
 | 
			
		||||
Program (independent of having been made by running the Program).
 | 
			
		||||
Whether that is true depends on what the Program does.
 | 
			
		||||
 | 
			
		||||
  1. You may copy and distribute verbatim copies of the Program's
 | 
			
		||||
source code as you receive it, in any medium, provided that you
 | 
			
		||||
conspicuously and appropriately publish on each copy an appropriate
 | 
			
		||||
copyright notice and disclaimer of warranty; keep intact all the
 | 
			
		||||
notices that refer to this License and to the absence of any warranty;
 | 
			
		||||
and give any other recipients of the Program a copy of this License
 | 
			
		||||
along with the Program.
 | 
			
		||||
 | 
			
		||||
You may charge a fee for the physical act of transferring a copy, and
 | 
			
		||||
you may at your option offer warranty protection in exchange for a fee.
 | 
			
		||||
 | 
			
		||||
  2. You may modify your copy or copies of the Program or any portion
 | 
			
		||||
of it, thus forming a work based on the Program, and copy and
 | 
			
		||||
distribute such modifications or work under the terms of Section 1
 | 
			
		||||
above, provided that you also meet all of these conditions:
 | 
			
		||||
 | 
			
		||||
    a) You must cause the modified files to carry prominent notices
 | 
			
		||||
    stating that you changed the files and the date of any change.
 | 
			
		||||
 | 
			
		||||
    b) You must cause any work that you distribute or publish, that in
 | 
			
		||||
    whole or in part contains or is derived from the Program or any
 | 
			
		||||
    part thereof, to be licensed as a whole at no charge to all third
 | 
			
		||||
    parties under the terms of this License.
 | 
			
		||||
 | 
			
		||||
    c) If the modified program normally reads commands interactively
 | 
			
		||||
    when run, you must cause it, when started running for such
 | 
			
		||||
    interactive use in the most ordinary way, to print or display an
 | 
			
		||||
    announcement including an appropriate copyright notice and a
 | 
			
		||||
    notice that there is no warranty (or else, saying that you provide
 | 
			
		||||
    a warranty) and that users may redistribute the program under
 | 
			
		||||
    these conditions, and telling the user how to view a copy of this
 | 
			
		||||
    License.  (Exception: if the Program itself is interactive but
 | 
			
		||||
    does not normally print such an announcement, your work based on
 | 
			
		||||
    the Program is not required to print an announcement.)
 | 
			
		||||
 | 
			
		||||
These requirements apply to the modified work as a whole.  If
 | 
			
		||||
identifiable sections of that work are not derived from the Program,
 | 
			
		||||
and can be reasonably considered independent and separate works in
 | 
			
		||||
themselves, then this License, and its terms, do not apply to those
 | 
			
		||||
sections when you distribute them as separate works.  But when you
 | 
			
		||||
distribute the same sections as part of a whole which is a work based
 | 
			
		||||
on the Program, the distribution of the whole must be on the terms of
 | 
			
		||||
this License, whose permissions for other licensees extend to the
 | 
			
		||||
entire whole, and thus to each and every part regardless of who wrote it.
 | 
			
		||||
 | 
			
		||||
Thus, it is not the intent of this section to claim rights or contest
 | 
			
		||||
your rights to work written entirely by you; rather, the intent is to
 | 
			
		||||
exercise the right to control the distribution of derivative or
 | 
			
		||||
collective works based on the Program.
 | 
			
		||||
 | 
			
		||||
In addition, mere aggregation of another work not based on the Program
 | 
			
		||||
with the Program (or with a work based on the Program) on a volume of
 | 
			
		||||
a storage or distribution medium does not bring the other work under
 | 
			
		||||
the scope of this License.
 | 
			
		||||
 | 
			
		||||
  3. You may copy and distribute the Program (or a work based on it,
 | 
			
		||||
under Section 2) in object code or executable form under the terms of
 | 
			
		||||
Sections 1 and 2 above provided that you also do one of the following:
 | 
			
		||||
 | 
			
		||||
    a) Accompany it with the complete corresponding machine-readable
 | 
			
		||||
    source code, which must be distributed under the terms of Sections
 | 
			
		||||
    1 and 2 above on a medium customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    b) Accompany it with a written offer, valid for at least three
 | 
			
		||||
    years, to give any third party, for a charge no more than your
 | 
			
		||||
    cost of physically performing source distribution, a complete
 | 
			
		||||
    machine-readable copy of the corresponding source code, to be
 | 
			
		||||
    distributed under the terms of Sections 1 and 2 above on a medium
 | 
			
		||||
    customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    c) Accompany it with the information you received as to the offer
 | 
			
		||||
    to distribute corresponding source code.  (This alternative is
 | 
			
		||||
    allowed only for noncommercial distribution and only if you
 | 
			
		||||
    received the program in object code or executable form with such
 | 
			
		||||
    an offer, in accord with Subsection b above.)
 | 
			
		||||
 | 
			
		||||
The source code for a work means the preferred form of the work for
 | 
			
		||||
making modifications to it.  For an executable work, complete source
 | 
			
		||||
code means all the source code for all modules it contains, plus any
 | 
			
		||||
associated interface definition files, plus the scripts used to
 | 
			
		||||
control compilation and installation of the executable.  However, as a
 | 
			
		||||
special exception, the source code distributed need not include
 | 
			
		||||
anything that is normally distributed (in either source or binary
 | 
			
		||||
form) with the major components (compiler, kernel, and so on) of the
 | 
			
		||||
operating system on which the executable runs, unless that component
 | 
			
		||||
itself accompanies the executable.
 | 
			
		||||
 | 
			
		||||
If distribution of executable or object code is made by offering
 | 
			
		||||
access to copy from a designated place, then offering equivalent
 | 
			
		||||
access to copy the source code from the same place counts as
 | 
			
		||||
distribution of the source code, even though third parties are not
 | 
			
		||||
compelled to copy the source along with the object code.
 | 
			
		||||
 | 
			
		||||
  4. You may not copy, modify, sublicense, or distribute the Program
 | 
			
		||||
except as expressly provided under this License.  Any attempt
 | 
			
		||||
otherwise to copy, modify, sublicense or distribute the Program is
 | 
			
		||||
void, and will automatically terminate your rights under this License.
 | 
			
		||||
However, parties who have received copies, or rights, from you under
 | 
			
		||||
this License will not have their licenses terminated so long as such
 | 
			
		||||
parties remain in full compliance.
 | 
			
		||||
 | 
			
		||||
  5. You are not required to accept this License, since you have not
 | 
			
		||||
signed it.  However, nothing else grants you permission to modify or
 | 
			
		||||
distribute the Program or its derivative works.  These actions are
 | 
			
		||||
prohibited by law if you do not accept this License.  Therefore, by
 | 
			
		||||
modifying or distributing the Program (or any work based on the
 | 
			
		||||
Program), you indicate your acceptance of this License to do so, and
 | 
			
		||||
all its terms and conditions for copying, distributing or modifying
 | 
			
		||||
the Program or works based on it.
 | 
			
		||||
 | 
			
		||||
  6. Each time you redistribute the Program (or any work based on the
 | 
			
		||||
Program), the recipient automatically receives a license from the
 | 
			
		||||
original licensor to copy, distribute or modify the Program subject to
 | 
			
		||||
these terms and conditions.  You may not impose any further
 | 
			
		||||
restrictions on the recipients' exercise of the rights granted herein.
 | 
			
		||||
You are not responsible for enforcing compliance by third parties to
 | 
			
		||||
this License.
 | 
			
		||||
 | 
			
		||||
  Each contributor grants you a non-exclusive, worldwide, royalty-free
 | 
			
		||||
patent license under the contributor's essential patent claims, to
 | 
			
		||||
make, use, sell, offer for sale, import and otherwise run, modify and
 | 
			
		||||
propagate the contents of its contributor version.
 | 
			
		||||
 | 
			
		||||
  In the following three paragraphs, a "patent license" is any express
 | 
			
		||||
agreement or commitment, however denominated, not to enforce a patent
 | 
			
		||||
(such as an express permission to practice a patent or covenant not to
 | 
			
		||||
sue for patent infringement).  To "grant" such a patent license to a
 | 
			
		||||
party means to make such an agreement or commitment not to enforce a
 | 
			
		||||
patent against the party.
 | 
			
		||||
 | 
			
		||||
  If you convey a covered work, knowingly relying on a patent license,
 | 
			
		||||
and the Corresponding Source of the work is not available for anyone
 | 
			
		||||
to copy, free of charge and under the terms of this License, through a
 | 
			
		||||
publicly available network server or other readily accessible means,
 | 
			
		||||
then you must either (1) cause the Corresponding Source to be so
 | 
			
		||||
available, or (2) arrange to deprive yourself of the benefit of the
 | 
			
		||||
patent license for this particular work, or (3) arrange, in a manner
 | 
			
		||||
consistent with the requirements of this License, to extend the patent
 | 
			
		||||
license to downstream recipients.  "Knowingly relying" means you have
 | 
			
		||||
actual knowledge that, but for the patent license, your conveying the
 | 
			
		||||
covered work in a country, or your recipient's use of the covered work
 | 
			
		||||
in a country, would infringe one or more identifiable patents in that
 | 
			
		||||
country that you have reason to believe are valid.
 | 
			
		||||
 | 
			
		||||
  If, pursuant to or in connection with a single transaction or
 | 
			
		||||
arrangement, you convey, or propagate by procuring conveyance of, a
 | 
			
		||||
covered work, and grant a patent license to some of the parties
 | 
			
		||||
receiving the covered work authorizing them to use, propagate, modify
 | 
			
		||||
or convey a specific copy of the covered work, then the patent license
 | 
			
		||||
you grant is automatically extended to all recipients of the covered
 | 
			
		||||
work and works based on it.
 | 
			
		||||
 | 
			
		||||
  A patent license is "discriminatory" if it does not include within
 | 
			
		||||
the scope of its coverage, prohibits the exercise of, or is
 | 
			
		||||
conditioned on the non-exercise of one or more of the rights that are
 | 
			
		||||
specifically granted under this License.  You may not convey a covered
 | 
			
		||||
work if you are a party to an arrangement with a third party that is
 | 
			
		||||
in the business of distributing software, under which you make payment
 | 
			
		||||
to the third party based on the extent of your activity of conveying
 | 
			
		||||
the work, and under which the third party grants, to any of the
 | 
			
		||||
parties who would receive the covered work from you, a discriminatory
 | 
			
		||||
patent license (a) in connection with copies of the covered work
 | 
			
		||||
conveyed by you (or copies made from those copies), or (b) primarily
 | 
			
		||||
for and in connection with specific products or compilations that
 | 
			
		||||
contain the covered work, unless you entered into that arrangement,
 | 
			
		||||
or that patent license was granted, prior to 28 March 2007.
 | 
			
		||||
 | 
			
		||||
  Nothing in this License shall be construed as excluding or limiting
 | 
			
		||||
any implied license or other defenses to infringement that may
 | 
			
		||||
otherwise be available to you under applicable patent law.
 | 
			
		||||
 | 
			
		||||
  12. No Surrender of Others' Freedom.
 | 
			
		||||
 | 
			
		||||
  If conditions are imposed on you (whether by court order, agreement or
 | 
			
		||||
  7. If, as a consequence of a court judgment or allegation of patent
 | 
			
		||||
infringement or for any other reason (not limited to patent issues),
 | 
			
		||||
conditions are imposed on you (whether by court order, agreement or
 | 
			
		||||
otherwise) that contradict the conditions of this License, they do not
 | 
			
		||||
excuse you from the conditions of this License.  If you cannot convey a
 | 
			
		||||
covered work so as to satisfy simultaneously your obligations under this
 | 
			
		||||
License and any other pertinent obligations, then as a consequence you may
 | 
			
		||||
not convey it at all.  For example, if you agree to terms that obligate you
 | 
			
		||||
to collect a royalty for further conveying from those to whom you convey
 | 
			
		||||
the Program, the only way you could satisfy both those terms and this
 | 
			
		||||
License would be to refrain entirely from conveying the Program.
 | 
			
		||||
excuse you from the conditions of this License.  If you cannot
 | 
			
		||||
distribute so as to satisfy simultaneously your obligations under this
 | 
			
		||||
License and any other pertinent obligations, then as a consequence you
 | 
			
		||||
may not distribute the Program at all.  For example, if a patent
 | 
			
		||||
license would not permit royalty-free redistribution of the Program by
 | 
			
		||||
all those who receive copies directly or indirectly through you, then
 | 
			
		||||
the only way you could satisfy both it and this License would be to
 | 
			
		||||
refrain entirely from distribution of the Program.
 | 
			
		||||
 | 
			
		||||
  13. Use with the GNU Affero General Public License.
 | 
			
		||||
If any portion of this section is held invalid or unenforceable under
 | 
			
		||||
any particular circumstance, the balance of the section is intended to
 | 
			
		||||
apply and the section as a whole is intended to apply in other
 | 
			
		||||
circumstances.
 | 
			
		||||
 | 
			
		||||
  Notwithstanding any other provision of this License, you have
 | 
			
		||||
permission to link or combine any covered work with a work licensed
 | 
			
		||||
under version 3 of the GNU Affero General Public License into a single
 | 
			
		||||
combined work, and to convey the resulting work.  The terms of this
 | 
			
		||||
License will continue to apply to the part which is the covered work,
 | 
			
		||||
but the special requirements of the GNU Affero General Public License,
 | 
			
		||||
section 13, concerning interaction through a network will apply to the
 | 
			
		||||
combination as such.
 | 
			
		||||
It is not the purpose of this section to induce you to infringe any
 | 
			
		||||
patents or other property right claims or to contest validity of any
 | 
			
		||||
such claims; this section has the sole purpose of protecting the
 | 
			
		||||
integrity of the free software distribution system, which is
 | 
			
		||||
implemented by public license practices.  Many people have made
 | 
			
		||||
generous contributions to the wide range of software distributed
 | 
			
		||||
through that system in reliance on consistent application of that
 | 
			
		||||
system; it is up to the author/donor to decide if he or she is willing
 | 
			
		||||
to distribute software through any other system and a licensee cannot
 | 
			
		||||
impose that choice.
 | 
			
		||||
 | 
			
		||||
  14. Revised Versions of this License.
 | 
			
		||||
This section is intended to make thoroughly clear what is believed to
 | 
			
		||||
be a consequence of the rest of this License.
 | 
			
		||||
 | 
			
		||||
  The Free Software Foundation may publish revised and/or new versions of
 | 
			
		||||
the GNU General Public License from time to time.  Such new versions will
 | 
			
		||||
  8. If the distribution and/or use of the Program is restricted in
 | 
			
		||||
certain countries either by patents or by copyrighted interfaces, the
 | 
			
		||||
original copyright holder who places the Program under this License
 | 
			
		||||
may add an explicit geographical distribution limitation excluding
 | 
			
		||||
those countries, so that distribution is permitted only in or among
 | 
			
		||||
countries not thus excluded.  In such case, this License incorporates
 | 
			
		||||
the limitation as if written in the body of this License.
 | 
			
		||||
 | 
			
		||||
  9. The Free Software Foundation may publish revised and/or new versions
 | 
			
		||||
of the General Public License from time to time.  Such new versions will
 | 
			
		||||
be similar in spirit to the present version, but may differ in detail to
 | 
			
		||||
address new problems or concerns.
 | 
			
		||||
 | 
			
		||||
  Each version is given a distinguishing version number.  If the
 | 
			
		||||
Program specifies that a certain numbered version of the GNU General
 | 
			
		||||
Public License "or any later version" applies to it, you have the
 | 
			
		||||
option of following the terms and conditions either of that numbered
 | 
			
		||||
version or of any later version published by the Free Software
 | 
			
		||||
Foundation.  If the Program does not specify a version number of the
 | 
			
		||||
GNU General Public License, you may choose any version ever published
 | 
			
		||||
by the Free Software Foundation.
 | 
			
		||||
Each version is given a distinguishing version number.  If the Program
 | 
			
		||||
specifies a version number of this License which applies to it and "any
 | 
			
		||||
later version", you have the option of following the terms and conditions
 | 
			
		||||
either of that version or of any later version published by the Free
 | 
			
		||||
Software Foundation.  If the Program does not specify a version number of
 | 
			
		||||
this License, you may choose any version ever published by the Free Software
 | 
			
		||||
Foundation.
 | 
			
		||||
 | 
			
		||||
  If the Program specifies that a proxy can decide which future
 | 
			
		||||
versions of the GNU General Public License can be used, that proxy's
 | 
			
		||||
public statement of acceptance of a version permanently authorizes you
 | 
			
		||||
to choose that version for the Program.
 | 
			
		||||
  10. If you wish to incorporate parts of the Program into other free
 | 
			
		||||
programs whose distribution conditions are different, write to the author
 | 
			
		||||
to ask for permission.  For software which is copyrighted by the Free
 | 
			
		||||
Software Foundation, write to the Free Software Foundation; we sometimes
 | 
			
		||||
make exceptions for this.  Our decision will be guided by the two goals
 | 
			
		||||
of preserving the free status of all derivatives of our free software and
 | 
			
		||||
of promoting the sharing and reuse of software generally.
 | 
			
		||||
 | 
			
		||||
  Later license versions may give you additional or different
 | 
			
		||||
permissions.  However, no additional obligations are imposed on any
 | 
			
		||||
author or copyright holder as a result of your choosing to follow a
 | 
			
		||||
later version.
 | 
			
		||||
                            NO WARRANTY
 | 
			
		||||
 | 
			
		||||
  15. Disclaimer of Warranty.
 | 
			
		||||
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 | 
			
		||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
 | 
			
		||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 | 
			
		||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
 | 
			
		||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
			
		||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
 | 
			
		||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
 | 
			
		||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
 | 
			
		||||
REPAIR OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
 | 
			
		||||
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
 | 
			
		||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
 | 
			
		||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
 | 
			
		||||
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
 | 
			
		||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
  16. Limitation of Liability.
 | 
			
		||||
 | 
			
		||||
  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
 | 
			
		||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
 | 
			
		||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
 | 
			
		||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
 | 
			
		||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
 | 
			
		||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
 | 
			
		||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
 | 
			
		||||
SUCH DAMAGES.
 | 
			
		||||
 | 
			
		||||
  17. Interpretation of Sections 15 and 16.
 | 
			
		||||
 | 
			
		||||
  If the disclaimer of warranty and limitation of liability provided
 | 
			
		||||
above cannot be given local legal effect according to their terms,
 | 
			
		||||
reviewing courts shall apply local law that most closely approximates
 | 
			
		||||
an absolute waiver of all civil liability in connection with the
 | 
			
		||||
Program, unless a warranty or assumption of liability accompanies a
 | 
			
		||||
copy of the Program in return for a fee.
 | 
			
		||||
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 | 
			
		||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
 | 
			
		||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
 | 
			
		||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
 | 
			
		||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 | 
			
		||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 | 
			
		||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGES.
 | 
			
		||||
 | 
			
		||||
                     END OF TERMS AND CONDITIONS
 | 
			
		||||
 | 
			
		||||
@@ -628,15 +287,15 @@ free software which everyone can redistribute and change under these terms.
 | 
			
		||||
 | 
			
		||||
  To do so, attach the following notices to the program.  It is safest
 | 
			
		||||
to attach them to the start of each source file to most effectively
 | 
			
		||||
state the exclusion of warranty; and each file should have at least
 | 
			
		||||
convey the exclusion of warranty; and each file should have at least
 | 
			
		||||
the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
 | 
			
		||||
    <one line to give the program's name and a brief idea of what it does.>
 | 
			
		||||
    Copyright (C) <year>  <name of author>
 | 
			
		||||
 | 
			
		||||
    This program is free software: you can redistribute it and/or modify
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
@@ -644,31 +303,38 @@ the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
Also add information on how to contact you by electronic and paper mail.
 | 
			
		||||
 | 
			
		||||
  If the program does terminal interaction, make it output a short
 | 
			
		||||
notice like this when it starts in an interactive mode:
 | 
			
		||||
If the program is interactive, make it output a short notice like this
 | 
			
		||||
when it starts in an interactive mode:
 | 
			
		||||
 | 
			
		||||
    <program>  Copyright (C) <year>  <name of author>
 | 
			
		||||
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
 | 
			
		||||
    Gnomovision version 69, Copyright (C) year name of author
 | 
			
		||||
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
 | 
			
		||||
    This is free software, and you are welcome to redistribute it
 | 
			
		||||
    under certain conditions; type `show c' for details.
 | 
			
		||||
 | 
			
		||||
The hypothetical commands `show w' and `show c' should show the appropriate
 | 
			
		||||
parts of the General Public License.  Of course, your program's commands
 | 
			
		||||
might be different; for a GUI interface, you would use an "about box".
 | 
			
		||||
parts of the General Public License.  Of course, the commands you use may
 | 
			
		||||
be called something other than `show w' and `show c'; they could even be
 | 
			
		||||
mouse-clicks or menu items--whatever suits your program.
 | 
			
		||||
 | 
			
		||||
  You should also get your employer (if you work as a programmer) or school,
 | 
			
		||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
 | 
			
		||||
For more information on this, and how to apply and follow the GNU GPL, see
 | 
			
		||||
<http://www.gnu.org/licenses/>.
 | 
			
		||||
You should also get your employer (if you work as a programmer) or your
 | 
			
		||||
school, if any, to sign a "copyright disclaimer" for the program, if
 | 
			
		||||
necessary.  Here is a sample; alter the names:
 | 
			
		||||
 | 
			
		||||
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
 | 
			
		||||
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
 | 
			
		||||
 | 
			
		||||
  <signature of Ty Coon>, 1 April 1989
 | 
			
		||||
  Ty Coon, President of Vice
 | 
			
		||||
 | 
			
		||||
This General Public License does not permit incorporating your program into
 | 
			
		||||
proprietary programs.  If your program is a subroutine library, you may
 | 
			
		||||
consider it more useful to permit linking proprietary applications with the
 | 
			
		||||
library.  If this is what you want to do, use the GNU Lesser General
 | 
			
		||||
Public License instead of this License.
 | 
			
		||||
 | 
			
		||||
  The GNU General Public License does not permit incorporating your program
 | 
			
		||||
into proprietary programs.  If your program is a subroutine library, you
 | 
			
		||||
may consider it more useful to permit linking proprietary applications with
 | 
			
		||||
the library.  If this is what you want to do, use the GNU Lesser General
 | 
			
		||||
Public License instead of this License.  But first, please read
 | 
			
		||||
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										10
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								LICENSE
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                   GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 2, June 1991
 | 
			
		||||
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
 | 
			
		||||
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
			
		||||
 Everyone is permitted to copy and distribute verbatim copies
 | 
			
		||||
 of this license document, but changing it is not allowed.
 | 
			
		||||
@@ -290,8 +290,8 @@ to attach them to the start of each source file to most effectively
 | 
			
		||||
convey the exclusion of warranty; and each file should have at least
 | 
			
		||||
the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
 | 
			
		||||
    {description}
 | 
			
		||||
    Copyright (C) {year}  {fullname}
 | 
			
		||||
    <one line to give the program's name and a brief idea of what it does.>
 | 
			
		||||
    Copyright (C) <year>  <name of author>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
@@ -329,7 +329,7 @@ necessary.  Here is a sample; alter the names:
 | 
			
		||||
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
 | 
			
		||||
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
 | 
			
		||||
 | 
			
		||||
  {signature of Ty Coon}, 1 April 1989
 | 
			
		||||
  <signature of Ty Coon>, 1 April 1989
 | 
			
		||||
  Ty Coon, President of Vice
 | 
			
		||||
 | 
			
		||||
This General Public License does not permit incorporating your program into
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										18
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								README.md
									
									
									
									
									
								
							@@ -1,6 +1,8 @@
 | 
			
		||||
# Grid
 | 
			
		||||
# Grid [](https://travis-ci.org/paboyle/Grid)
 | 
			
		||||
Data parallel C++ mathematical object library
 | 
			
		||||
 | 
			
		||||
Last update 2015/7/30
 | 
			
		||||
 | 
			
		||||
This library provides data parallel C++ container classes with internal memory layout
 | 
			
		||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
 | 
			
		||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
 | 
			
		||||
@@ -20,16 +22,16 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
 | 
			
		||||
for most programmers.
 | 
			
		||||
 | 
			
		||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
 | 
			
		||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
 | 
			
		||||
 | 
			
		||||
These are presented as 
 | 
			
		||||
 | 
			
		||||
  vRealF, vRealD, vComplexF, vComplexD 
 | 
			
		||||
     vRealF, vRealD, vComplexF, vComplexD 
 | 
			
		||||
 | 
			
		||||
internal vector data types. These may be useful in themselves for other programmers.
 | 
			
		||||
The corresponding scalar types are named
 | 
			
		||||
 | 
			
		||||
  RealF, RealD, ComplexF, ComplexD
 | 
			
		||||
     RealF, RealD, ComplexF, ComplexD
 | 
			
		||||
 | 
			
		||||
MPI, OpenMP, and SIMD parallelism are present in the library.
 | 
			
		||||
 | 
			
		||||
@@ -39,12 +41,18 @@ are examples:
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
 | 
			
		||||
 | 
			
		||||
     ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
 | 
			
		||||
     
 | 
			
		||||
Note: Before running configure it could be necessary to execute the script 
 | 
			
		||||
       
 | 
			
		||||
       script/filelist
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
     
 | 
			
		||||
For developers:
 | 
			
		||||
Use reconfigure_script in the scripts/ directory to create the autotools environment 
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										22
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								TODO
									
									
									
									
									
								
							@@ -1,5 +1,27 @@
 | 
			
		||||
TODO:
 | 
			
		||||
---------------
 | 
			
		||||
 | 
			
		||||
* Forces; the UdSdU  term in gauge force term is half of what I think it should
 | 
			
		||||
  be. This is a consequence of taking ONLY the first term in:
 | 
			
		||||
 | 
			
		||||
  dSg/dt = dU/dt dSdU + dUdag/dt dSdUdag
 | 
			
		||||
 | 
			
		||||
  in the fermion force.
 | 
			
		||||
 | 
			
		||||
  Now, S_mom = - tr Pmu Pmu      ; Pmu anti-herm
 | 
			
		||||
 | 
			
		||||
                                  .
 | 
			
		||||
       d Smom/dt = - 2.0 tr Pmu Pmu   = - dSg/dt = - tr Pmu [Umu dSdUmu + UmuDag dSdUmuDag]
 | 
			
		||||
 | 
			
		||||
           .
 | 
			
		||||
       => Pmu =  Umu dSdUmu
 | 
			
		||||
 | 
			
		||||
       Where the norm is half expected.
 | 
			
		||||
 | 
			
		||||
  This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
 | 
			
		||||
  This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Policies:
 | 
			
		||||
 | 
			
		||||
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_comms.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_dwf.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
@@ -16,15 +44,21 @@ struct scal {
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
 | 
			
		||||
    overlapComms = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt4 = GridDefaultLatt();
 | 
			
		||||
  const int Ls=16;
 | 
			
		||||
  const int Ls=8;
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
@@ -79,10 +113,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
 | 
			
		||||
  typename DomainWallFermionR::ImplParams params; 
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
  
 | 
			
		||||
  RealD NP = UGrid->_Nprocessors;
 | 
			
		||||
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=10000;
 | 
			
		||||
  int ncall=100;
 | 
			
		||||
  {
 | 
			
		||||
    double t0=usecond();
 | 
			
		||||
    for(int i=0;i<ncall;i++){
 | 
			
		||||
@@ -97,11 +137,13 @@ int main (int argc, char ** argv)
 | 
			
		||||
    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
    err = ref-result; 
 | 
			
		||||
    std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
			
		||||
    Dw.Report();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  exit(0);
 | 
			
		||||
 | 
			
		||||
  if (1)
 | 
			
		||||
  { // Naive wilson dag implementation
 | 
			
		||||
@@ -154,8 +196,8 @@ int main (int argc, char ** argv)
 | 
			
		||||
    double flops=(1344.0*volume*ncall)/2;
 | 
			
		||||
 | 
			
		||||
    std::cout<<GridLogMessage << "Deo mflop/s =   "<< flops/(t1-t0)<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage << "Deo mflop/s per node   "<< flops/(t1-t0)/NP<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
			
		||||
  Dw.DhopOE(src_e,r_o,DaggerNo);
 | 
			
		||||
  Dw.Dhop  (src  ,result,DaggerNo);
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_memory_asynch.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_su3.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/Benchmark_wilson.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
@@ -16,10 +44,15 @@ struct scal {
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
bool overlapComms = false;
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
 | 
			
		||||
    overlapComms = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<int> latt_size   = GridDefaultLatt();
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
@@ -57,11 +90,12 @@ int main (int argc, char ** argv)
 | 
			
		||||
  Complex cone(1.0,0.0);
 | 
			
		||||
  for(int nn=0;nn<Nd;nn++){
 | 
			
		||||
    random(pRNG,U[nn]);
 | 
			
		||||
    if(0) {
 | 
			
		||||
      if (nn==-1) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      else       { U[nn] = cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
    if(1) {
 | 
			
		||||
      if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      //      else       { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
      else       { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
 | 
			
		||||
    }
 | 
			
		||||
    pokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
			
		||||
    PokeIndex<LorentzIndex>(Umu,U[nn],nn);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@@ -87,7 +121,11 @@ int main (int argc, char ** argv)
 | 
			
		||||
  }
 | 
			
		||||
  ref = -0.5*ref;
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 | 
			
		||||
 | 
			
		||||
  typename WilsonFermionR::ImplParams params; 
 | 
			
		||||
  params.overlapCommsCompute = overlapComms;
 | 
			
		||||
 | 
			
		||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
 | 
			
		||||
  
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=1000;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										172
									
								
								benchmarks/Benchmark_zmm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								benchmarks/Benchmark_zmm.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,172 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_zmm.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls);
 | 
			
		||||
 | 
			
		||||
int main(int argc,char **argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  std::ofstream os("zmm.dat");
 | 
			
		||||
 | 
			
		||||
  os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
 | 
			
		||||
  for(int L=4;L<=32;L+=4){
 | 
			
		||||
    for(int m=1;m<=2;m++){
 | 
			
		||||
      for(int Ls=8;Ls<=16;Ls+=8){
 | 
			
		||||
	std::vector<int> grid({L,L,m*L,m*L});
 | 
			
		||||
	for(int i=0;i<4;i++) { 
 | 
			
		||||
	  std::cout << grid[i]<<"x";
 | 
			
		||||
	}
 | 
			
		||||
	std::cout << Ls<<std::endl;
 | 
			
		||||
	bench(os,grid,Ls);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
			
		||||
  std::vector<int> mpi_layout  = GridDefaultMpi();
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
 | 
			
		||||
  GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion src (FGrid);
 | 
			
		||||
  LatticeFermion tmp (FGrid);
 | 
			
		||||
  LatticeFermion srce(FrbGrid);
 | 
			
		||||
 | 
			
		||||
  LatticeFermion resulto(FrbGrid); resulto=zero;
 | 
			
		||||
  LatticeFermion resulta(FrbGrid); resulta=zero;
 | 
			
		||||
  LatticeFermion junk(FrbGrid); junk=zero;
 | 
			
		||||
  LatticeFermion diff(FrbGrid); 
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
 | 
			
		||||
  double mfc, mfa, mfo, mfl1;
 | 
			
		||||
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  random(RNG5,src);
 | 
			
		||||
#if 1
 | 
			
		||||
  random(RNG4,Umu);
 | 
			
		||||
#else
 | 
			
		||||
  int mmu=2;
 | 
			
		||||
  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++){
 | 
			
		||||
    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
 | 
			
		||||
    if ( mu!=mmu ) U[mu] = zero;
 | 
			
		||||
    if ( mu==mmu ) U[mu] = 1.0;
 | 
			
		||||
    PokeIndex<LorentzIndex>(Umu,U[mu],mu);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 pickCheckerboard(Even,srce,src);
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
 | 
			
		||||
  int ncall=50;
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.DhopOE(srce,resulto,0);
 | 
			
		||||
  }
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
 | 
			
		||||
  double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
			
		||||
  double flops=1344*volume/2;
 | 
			
		||||
 | 
			
		||||
  mfc = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s =   "<< mfc<<std::endl;
 | 
			
		||||
 | 
			
		||||
  QCD::WilsonFermion5DStatic::AsmOptDslash=1;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfa = flops*ncall/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s =   "<< mfa<<std::endl;
 | 
			
		||||
  /*
 | 
			
		||||
  int dag=DaggerNo;
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<1;i++){
 | 
			
		||||
    Dw.DhopInternalOMPbench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfo = flops*100/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM-OMP Dw"<< " mflop/s =   "<< mfo<<std::endl;
 | 
			
		||||
 | 
			
		||||
  t0=usecond();
 | 
			
		||||
  for(int i=0;i<1;i++){
 | 
			
		||||
    Dw.DhopInternalL1bench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
 | 
			
		||||
  }
 | 
			
		||||
  t1=usecond();
 | 
			
		||||
  mfl1= flops*100/(t1-t0);
 | 
			
		||||
  std::cout<<GridLogMessage << "Called ASM-L1 Dw"<< " mflop/s =   "<< mfl1<<std::endl;
 | 
			
		||||
  os << latt4[0]*latt4[1]*latt4[2]*latt4[3]<< " "<<Ls<<" "<< latt4[0] <<" " <<latt4[2]<< " "
 | 
			
		||||
     << mfc<<" "
 | 
			
		||||
     << mfa<<" "
 | 
			
		||||
     << mfo<<" "
 | 
			
		||||
     << mfl1<<std::endl;
 | 
			
		||||
  */
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
    PerformanceCounter Counter(i);
 | 
			
		||||
    Counter.Start();
 | 
			
		||||
    Dw.DhopOE(srce,resulta,0);
 | 
			
		||||
    Counter.Stop();
 | 
			
		||||
    Counter.Report();
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  //resulta = (-0.5) * resulta;
 | 
			
		||||
 | 
			
		||||
  diff = resulto-resulta;
 | 
			
		||||
  std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
 | 
			
		||||
  std::cout<<std::endl;
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
 | 
			
		||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
 | 
			
		||||
@@ -25,3 +25,7 @@ Benchmark_su3_LDADD=-lGrid
 | 
			
		||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
 | 
			
		||||
Benchmark_wilson_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
 | 
			
		||||
Benchmark_zmm_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/simple_su3_expr.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./benchmarks/simple_su3_test.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										83
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										83
									
								
								configure.ac
									
									
									
									
									
								
							@@ -53,25 +53,21 @@ echo
 | 
			
		||||
echo Checking libraries 
 | 
			
		||||
echo :::::::::::::::::::::::::::::::::::::::::::
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
AC_CHECK_FUNCS([gettimeofday])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.gmplib.org)])
 | 
			
		||||
 | 
			
		||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
 | 
			
		||||
#        [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
 | 
			
		||||
#Please install or provide the correct path to your installation
 | 
			
		||||
#Info at: http://www.mpfr.org/)])
 | 
			
		||||
#
 | 
			
		||||
# SIMD instructions selection
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
 | 
			
		||||
	[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
 | 
			
		||||
	[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
 | 
			
		||||
 | 
			
		||||
supported=no
 | 
			
		||||
 | 
			
		||||
ac_ZMM=no;
 | 
			
		||||
 | 
			
		||||
case ${ac_SIMD} in
 | 
			
		||||
     SSE4)
 | 
			
		||||
       echo Configuring for SSE4
 | 
			
		||||
@@ -113,11 +109,13 @@ case ${ac_SIMD} in
 | 
			
		||||
       echo Configuring for AVX512 
 | 
			
		||||
       AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
       ac_ZMM=yes;
 | 
			
		||||
     ;;
 | 
			
		||||
     IMCI)
 | 
			
		||||
       echo Configuring for IMCI
 | 
			
		||||
       AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
 | 
			
		||||
       supported="cross compilation"
 | 
			
		||||
       ac_ZMM=no;
 | 
			
		||||
     ;;
 | 
			
		||||
     NEONv8)
 | 
			
		||||
       echo Configuring for experimental ARMv8a support 
 | 
			
		||||
@@ -133,6 +131,17 @@ case ${ac_SIMD} in
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
case ${ac_ZMM} in
 | 
			
		||||
yes)
 | 
			
		||||
	echo Enabling ZMM source code
 | 
			
		||||
;;
 | 
			
		||||
no)
 | 
			
		||||
	echo Disabling ZMM source code
 | 
			
		||||
;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
 | 
			
		||||
case ${ac_PRECISION} in
 | 
			
		||||
     single)
 | 
			
		||||
@@ -145,6 +154,10 @@ case ${ac_PRECISION} in
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Comms selection
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
 | 
			
		||||
 | 
			
		||||
case ${ac_COMMS} in
 | 
			
		||||
@@ -156,14 +169,39 @@ case ${ac_COMMS} in
 | 
			
		||||
       echo Configuring for MPI communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
 | 
			
		||||
     ;;
 | 
			
		||||
     shmem)
 | 
			
		||||
       echo Configuring for SHMEM communications
 | 
			
		||||
       AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
 | 
			
		||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# RNG selection
 | 
			
		||||
#
 | 
			
		||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
 | 
			
		||||
	[Select Random Number Generator to be used])],\
 | 
			
		||||
	[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
 | 
			
		||||
case ${ac_RNG} in
 | 
			
		||||
     ranlux48)
 | 
			
		||||
     AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
 | 
			
		||||
     ;;
 | 
			
		||||
     mt19937)
 | 
			
		||||
     AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
     AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); 
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
#
 | 
			
		||||
# Chroma regression tests
 | 
			
		||||
#
 | 
			
		||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
 | 
			
		||||
 | 
			
		||||
case ${ac_CHROMA} in
 | 
			
		||||
@@ -180,6 +218,26 @@ esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Lapack
 | 
			
		||||
#
 | 
			
		||||
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
 | 
			
		||||
 | 
			
		||||
case ${ac_LAPACK} in
 | 
			
		||||
     yes)
 | 
			
		||||
       echo Enabling lapack
 | 
			
		||||
     ;;
 | 
			
		||||
     no)
 | 
			
		||||
       echo Disabling lapack
 | 
			
		||||
     ;;
 | 
			
		||||
     *)
 | 
			
		||||
       echo Enabling lapack at ${ac_LAPACK}
 | 
			
		||||
     ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
 | 
			
		||||
AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
 | 
			
		||||
 | 
			
		||||
###################################################################
 | 
			
		||||
# Checks for doxygen support
 | 
			
		||||
# if present enables the "make doxyfile" command
 | 
			
		||||
@@ -219,8 +277,11 @@ The following features are enabled:
 | 
			
		||||
- graphs and diagrams           : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
 | 
			
		||||
- Supported SIMD flags          : $SIMD_FLAGS
 | 
			
		||||
----------------------------------------------------------
 | 
			
		||||
- enabled simd support          : ${ac_SIMD}   (supported: $supported )
 | 
			
		||||
- enabled simd support          : ${ac_SIMD}   (config macro says supported: $supported )
 | 
			
		||||
- communications type           : ${ac_COMMS}
 | 
			
		||||
- default precision             : ${ac_PRECISION}
 | 
			
		||||
- RNG choice                    : ${ac_RNG} 
 | 
			
		||||
- LAPACK	                : ${ac_LAPACK} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./gcc-bug-report/broken.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <complex>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Algorithms.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALGORITHMS_H
 | 
			
		||||
#define GRID_ALGORITHMS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/AlignedAllocator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
 | 
			
		||||
#define GRID_ALIGNED_ALLOCATOR_H
 | 
			
		||||
 | 
			
		||||
@@ -8,11 +36,18 @@
 | 
			
		||||
#include <malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
#include <mm_malloc.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
extern "C" { 
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
extern void * shmem_align(size_t, size_t);
 | 
			
		||||
extern void  shmem_free(void *);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -44,21 +79,59 @@ public:
 | 
			
		||||
 | 
			
		||||
  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
			
		||||
 | 
			
		||||
  pointer allocate(size_type __n, const void* = 0)
 | 
			
		||||
  pointer allocate(size_type __n, const void* _p= 0)
 | 
			
		||||
  { 
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
 | 
			
		||||
    _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define PARANOID_SYMMETRIC_HEAP
 | 
			
		||||
#ifdef PARANOID_SYMMETRIC_HEAP
 | 
			
		||||
    static void * bcast;
 | 
			
		||||
    static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
    bcast = (void *) ptr;
 | 
			
		||||
    shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
 | 
			
		||||
 | 
			
		||||
    if ( bcast != ptr ) {
 | 
			
		||||
      std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
 | 
			
		||||
      BACKTRACEFILE();
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    assert( bcast == (void *) ptr);
 | 
			
		||||
 | 
			
		||||
#endif 
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 | 
			
		||||
#else
 | 
			
		||||
    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
    _Tp tmp;
 | 
			
		||||
#undef FIRST_TOUCH_OPTIMISE
 | 
			
		||||
#ifdef FIRST_TOUCH_OPTIMISE
 | 
			
		||||
#pragma omp parallel for 
 | 
			
		||||
  for(int i=0;i<__n;i++){
 | 
			
		||||
    ptr[i]=tmp;
 | 
			
		||||
  }
 | 
			
		||||
#endif 
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void deallocate(pointer __p, size_type) { 
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
    shmem_free((void *)__p);
 | 
			
		||||
#else
 | 
			
		||||
#ifdef HAVE_MM_MALLOC_H
 | 
			
		||||
    _mm_free((void *)__p); 
 | 
			
		||||
#else
 | 
			
		||||
    free((void *)__p);
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  void construct(pointer __p, const _Tp& __val) { };
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cartesian.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_H
 | 
			
		||||
#define GRID_CARTESIAN_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Communicator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMMUNICATOR_H
 | 
			
		||||
#define GRID_COMMUNICATOR_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										174
									
								
								lib/Config.h.in
									
									
									
									
									
								
							
							
						
						
									
										174
									
								
								lib/Config.h.in
									
									
									
									
									
								
							@@ -1,174 +0,0 @@
 | 
			
		||||
/* lib/Config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
/* AVX Intrinsics */
 | 
			
		||||
#undef AVX1
 | 
			
		||||
 | 
			
		||||
/* AVX2 Intrinsics */
 | 
			
		||||
#undef AVX2
 | 
			
		||||
 | 
			
		||||
/* AVX512 Intrinsics for Knights Landing */
 | 
			
		||||
#undef AVX512
 | 
			
		||||
 | 
			
		||||
/* AVX Intrinsics with FMA4 */
 | 
			
		||||
#undef AVXFMA4
 | 
			
		||||
 | 
			
		||||
/* EMPTY_SIMD only for DEBUGGING */
 | 
			
		||||
#undef EMPTY_SIMD
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_MPI */
 | 
			
		||||
#undef GRID_COMMS_MPI
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_NONE */
 | 
			
		||||
#undef GRID_COMMS_NONE
 | 
			
		||||
 | 
			
		||||
/* GRID_DEFAULT_PRECISION is DOUBLE */
 | 
			
		||||
#undef GRID_DEFAULT_PRECISION_DOUBLE
 | 
			
		||||
 | 
			
		||||
/* GRID_DEFAULT_PRECISION is SINGLE */
 | 
			
		||||
#undef GRID_DEFAULT_PRECISION_SINGLE
 | 
			
		||||
 | 
			
		||||
/* Support Altivec instructions */
 | 
			
		||||
#undef HAVE_ALTIVEC
 | 
			
		||||
 | 
			
		||||
/* Support AVX (Advanced Vector Extensions) instructions */
 | 
			
		||||
#undef HAVE_AVX
 | 
			
		||||
 | 
			
		||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
 | 
			
		||||
#undef HAVE_AVX2
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
			
		||||
   don't. */
 | 
			
		||||
#undef HAVE_DECL_BE64TOH
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
 | 
			
		||||
   */
 | 
			
		||||
#undef HAVE_DECL_NTOHLL
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <endian.h> header file. */
 | 
			
		||||
#undef HAVE_ENDIAN_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <execinfo.h> header file. */
 | 
			
		||||
#undef HAVE_EXECINFO_H
 | 
			
		||||
 | 
			
		||||
/* Support FMA3 (Fused Multiply-Add) instructions */
 | 
			
		||||
#undef HAVE_FMA
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `gettimeofday' function. */
 | 
			
		||||
#undef HAVE_GETTIMEOFDAY
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <gmp.h> header file. */
 | 
			
		||||
#undef HAVE_GMP_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <inttypes.h> header file. */
 | 
			
		||||
#undef HAVE_INTTYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MALLOC_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
			
		||||
#undef HAVE_MEMORY_H
 | 
			
		||||
 | 
			
		||||
/* Support mmx instructions */
 | 
			
		||||
#undef HAVE_MMX
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
 | 
			
		||||
#undef HAVE_MM_MALLOC_H
 | 
			
		||||
 | 
			
		||||
/* Support SSE (Streaming SIMD Extensions) instructions */
 | 
			
		||||
#undef HAVE_SSE
 | 
			
		||||
 | 
			
		||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
 | 
			
		||||
#undef HAVE_SSE2
 | 
			
		||||
 | 
			
		||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
 | 
			
		||||
#undef HAVE_SSE3
 | 
			
		||||
 | 
			
		||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
 | 
			
		||||
#undef HAVE_SSE4_1
 | 
			
		||||
 | 
			
		||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
 | 
			
		||||
#undef HAVE_SSE4_2
 | 
			
		||||
 | 
			
		||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
 | 
			
		||||
#undef HAVE_SSSE3
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
			
		||||
#undef HAVE_STDINT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdlib.h> header file. */
 | 
			
		||||
#undef HAVE_STDLIB_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <strings.h> header file. */
 | 
			
		||||
#undef HAVE_STRINGS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string.h> header file. */
 | 
			
		||||
#undef HAVE_STRING_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/stat.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_STAT_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/types.h> header file. */
 | 
			
		||||
#undef HAVE_SYS_TYPES_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
			
		||||
#undef HAVE_UNISTD_H
 | 
			
		||||
 | 
			
		||||
/* IMCI Intrinsics for Knights Corner */
 | 
			
		||||
#undef IMCI
 | 
			
		||||
 | 
			
		||||
/* NEON ARMv8 Experimental support */
 | 
			
		||||
#undef NEONv8
 | 
			
		||||
 | 
			
		||||
/* Name of package */
 | 
			
		||||
#undef PACKAGE
 | 
			
		||||
 | 
			
		||||
/* Define to the address where bug reports for this package should be sent. */
 | 
			
		||||
#undef PACKAGE_BUGREPORT
 | 
			
		||||
 | 
			
		||||
/* Define to the full name of this package. */
 | 
			
		||||
#undef PACKAGE_NAME
 | 
			
		||||
 | 
			
		||||
/* Define to the full name and version of this package. */
 | 
			
		||||
#undef PACKAGE_STRING
 | 
			
		||||
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#undef PACKAGE_TARNAME
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#undef PACKAGE_URL
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#undef PACKAGE_VERSION
 | 
			
		||||
 | 
			
		||||
/* SSE4 Intrinsics */
 | 
			
		||||
#undef SSE4
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
#undef STDC_HEADERS
 | 
			
		||||
 | 
			
		||||
/* Version number of package */
 | 
			
		||||
#undef VERSION
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
#undef _UINT32_T
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
#undef _UINT64_T
 | 
			
		||||
 | 
			
		||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
 | 
			
		||||
#undef size_t
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
#undef uint32_t
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
#undef uint64_t
 | 
			
		||||
							
								
								
									
										31
									
								
								lib/Cshift.h
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								lib/Cshift.h
									
									
									
									
									
								
							@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Cshift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_H_
 | 
			
		||||
#define _GRID_CSHIFT_H_
 | 
			
		||||
 | 
			
		||||
@@ -10,4 +37,8 @@
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
#include <cshift/Cshift_mpi.h>
 | 
			
		||||
#endif 
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
#include <cshift/Cshift_mpi.h> // uses same implementation of communicator
 | 
			
		||||
#endif 
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										38
									
								
								lib/Grid.h
									
									
									
									
									
								
							
							
						
						
									
										38
									
								
								lib/Grid.h
									
									
									
									
									
								
							@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Grid.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
//
 | 
			
		||||
//  Grid.h
 | 
			
		||||
//  simd
 | 
			
		||||
@@ -33,10 +62,12 @@
 | 
			
		||||
#include <serialisation/Serialisation.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
#include <Timer.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
#include <Log.h>
 | 
			
		||||
#include <AlignedAllocator.h>
 | 
			
		||||
#include <Simd.h>
 | 
			
		||||
#include <Threads.h>
 | 
			
		||||
#include <Lexicographic.h>
 | 
			
		||||
#include <Communicator.h> 
 | 
			
		||||
#include <Cartesian.h>    
 | 
			
		||||
#include <Tensors.h>      
 | 
			
		||||
@@ -44,11 +75,14 @@
 | 
			
		||||
#include <Cshift.h>       
 | 
			
		||||
#include <Stencil.h>      
 | 
			
		||||
#include <Algorithms.h>   
 | 
			
		||||
#include <qcd/QCD.h>
 | 
			
		||||
#include <parallelIO/BinaryIO.h>
 | 
			
		||||
#include <qcd/QCD.h>
 | 
			
		||||
#include <parallelIO/NerscIO.h>
 | 
			
		||||
 | 
			
		||||
#include <Init.h>
 | 
			
		||||
 | 
			
		||||
#include <qcd/hmc/NerscCheckpointer.h>
 | 
			
		||||
#include <qcd/hmc/HmcRunner.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										105
									
								
								lib/Init.cc
									
									
									
									
									
								
							
							
						
						
									
										105
									
								
								lib/Init.cc
									
									
									
									
									
								
							@@ -1,3 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Init.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
/****************************************************************************/
 | 
			
		||||
/* pab: Signal magic. Processor state dump is x86-64 specific               */
 | 
			
		||||
/****************************************************************************/
 | 
			
		||||
@@ -15,12 +45,6 @@
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
 | 
			
		||||
#define __X86_64
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#include <execinfo.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
@@ -120,6 +144,10 @@ void GridParseLayout(char **argv,int argc,
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
 | 
			
		||||
    std::vector<int> ompthreads(0);
 | 
			
		||||
#ifndef GRID_OMP
 | 
			
		||||
    std::cout << GridLogWarning << "'--threads' option used but Grid was"
 | 
			
		||||
              << " not compiled with thread support" << std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
    arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
 | 
			
		||||
    GridCmdOptionIntVector(arg,ompthreads);
 | 
			
		||||
    assert(ompthreads.size()==1);
 | 
			
		||||
@@ -144,9 +172,8 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
 | 
			
		||||
/////////////////////////////////////////////////////////
 | 
			
		||||
void Grid_init(int *argc,char ***argv)
 | 
			
		||||
{
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
  MPI_Init(argc,argv);
 | 
			
		||||
#endif
 | 
			
		||||
  CartesianCommunicator::Init(argc,argv);
 | 
			
		||||
 | 
			
		||||
  // Parse command line args.
 | 
			
		||||
 | 
			
		||||
  GridLogger::StopWatch.Start();
 | 
			
		||||
@@ -164,9 +191,10 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"--debug-stdout  : print stdout from EVERY node"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--mpi n.n.n.n   : default MPI decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--omp n         : default number of OMP threads"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--threads n     : default number of OMP threads"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"--grid n.n.n.n  : default Grid size"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Debug"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
 | 
			
		||||
@@ -207,7 +235,37 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::cout <<std::endl;
 | 
			
		||||
  std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::RED  << "__|__|__|__|__"<<             "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::RED  << "__|__|  |  |  "<<             "|  |  | "<<Logger::PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::RED  << "__|__         "<<             "        "<<Logger::PURPLE<<"        "<<                "          _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<" GGGG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE  <<" III    "<<Logger::PURPLE<<"DDDD  "<<Logger::PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<"G       "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D   D "<<Logger::PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::RED  << "__|_  "<<Logger::GREEN<<"G       "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D    D"<<Logger::PURPLE<<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<"G  GG   "<<Logger::RED<<" RRRR   "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D    D"<<Logger::GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<"G   G   "<<Logger::RED<<" R  R   "<<Logger::BLUE  <<"  I     "<<Logger::PURPLE<<"D   D "<<Logger::GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|_  "<<Logger::GREEN<<" GGGG   "<<Logger::RED<<" R   R  "<<Logger::BLUE  <<" III    "<<Logger::PURPLE<<"DDDD  "<<Logger::GREEN <<"    _|__"<<std::endl;
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|__         "<<             "        "<<Logger::GREEN <<"        "<<                "          _|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|__|_"<<Logger::GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::BLUE << "__|__|__|__|__"<<             "|__|__|_"<<Logger::GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
 | 
			
		||||
  std::cout <<Logger::BLUE << "  |  |  |  |  "<<             "|  |  | "<<Logger::GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl; 
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout <<Logger::YELLOW<< std::endl;
 | 
			
		||||
  std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
 | 
			
		||||
  std::cout << "Colours by Tadahito Boyle "<<std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
 | 
			
		||||
  std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
 | 
			
		||||
  std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
 | 
			
		||||
  std::cout << "(at your option) any later version."<<std::endl;
 | 
			
		||||
  std::cout << std::endl;
 | 
			
		||||
  std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
 | 
			
		||||
  std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
 | 
			
		||||
  std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl;
 | 
			
		||||
  std::cout << "GNU General Public License for more details."<<std::endl;
 | 
			
		||||
  std::cout << Logger::BLACK <<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
@@ -224,7 +282,6 @@ double usecond(void) {
 | 
			
		||||
  return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define _NBACKTRACE (256)
 | 
			
		||||
void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
			
		||||
 | 
			
		||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
@@ -234,13 +291,13 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
  printf("         code %d\n",si->si_code);
 | 
			
		||||
 | 
			
		||||
  // Linux/Posix
 | 
			
		||||
#ifdef __linux__ 
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  // And x86 64bit
 | 
			
		||||
    ucontext_t * uc= (ucontext_t *)ptr;
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  ucontext_t * uc= (ucontext_t *)ptr;
 | 
			
		||||
  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
 | 
			
		||||
  printf("  instruction %llx\n",(unsigned long long)sc->rip);
 | 
			
		||||
#define REG(A)  printf("  %s %lx\n",#A,sc-> A);
 | 
			
		||||
 | 
			
		||||
  REG(rdi);
 | 
			
		||||
  REG(rsi);
 | 
			
		||||
  REG(rbp);
 | 
			
		||||
@@ -261,17 +318,15 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 | 
			
		||||
  REG(r14);
 | 
			
		||||
  REG(r15);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
  int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);
 | 
			
		||||
  char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);
 | 
			
		||||
  for (int i = 0; i < symbols; i++){
 | 
			
		||||
    printf ("%s\n", strings[i]);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
  BACKTRACE();
 | 
			
		||||
  exit(0);
 | 
			
		||||
  return;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_FPE
 | 
			
		||||
#define _GNU_SOURCE
 | 
			
		||||
#include <fenv.h>
 | 
			
		||||
#endif
 | 
			
		||||
void Grid_debug_handler_init(void)
 | 
			
		||||
{
 | 
			
		||||
  struct sigaction sa,osa;
 | 
			
		||||
@@ -280,5 +335,9 @@ void Grid_debug_handler_init(void)
 | 
			
		||||
  sa.sa_flags    = SA_SIGINFO;
 | 
			
		||||
  sigaction(SIGSEGV,&sa,NULL);
 | 
			
		||||
  sigaction(SIGTRAP,&sa,NULL);
 | 
			
		||||
#ifdef GRID_FPE
 | 
			
		||||
  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
 | 
			
		||||
  sigaction(SIGFPE,&sa,NULL);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										30
									
								
								lib/Init.h
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								lib/Init.h
									
									
									
									
									
								
							@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Init.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_INIT_H
 | 
			
		||||
#define GRID_INIT_H
 | 
			
		||||
 | 
			
		||||
@@ -21,6 +49,8 @@ namespace Grid {
 | 
			
		||||
  std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
 | 
			
		||||
  bool        GridCmdOptionExists(char** begin, char** end, const std::string& option);
 | 
			
		||||
  std::string GridCmdVectorIntToString(const std::vector<int> & vec);
 | 
			
		||||
  void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
 | 
			
		||||
  void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec);
 | 
			
		||||
 | 
			
		||||
  void GridParseLayout(char **argv,int argc,
 | 
			
		||||
		       std::vector<int> &latt,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Lattice.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_H
 | 
			
		||||
#define GRID_LATTICE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										32
									
								
								lib/Lexicographic.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								lib/Lexicographic.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
#ifndef GRID_LEXICOGRAPHIC_H
 | 
			
		||||
#define GRID_LEXICOGRAPHIC_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid{
 | 
			
		||||
 | 
			
		||||
  class Lexicographic {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
    static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
 | 
			
		||||
      int nd= dims.size();
 | 
			
		||||
      coor.resize(nd);
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	coor[d] = index % dims[d];
 | 
			
		||||
	index   = index / dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
 | 
			
		||||
      int nd=dims.size();
 | 
			
		||||
      int stride=1;
 | 
			
		||||
      index=0;
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	index = index+stride*coor[d];
 | 
			
		||||
	stride=stride*dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										90
									
								
								lib/Log.cc
									
									
									
									
									
								
							
							
						
						
									
										90
									
								
								lib/Log.cc
									
									
									
									
									
								
							@@ -1,16 +1,67 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Log.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
GridStopWatch Logger::StopWatch;
 | 
			
		||||
std::ostream  Logger::devnull(0);
 | 
			
		||||
std::string Logger::BLACK("\033[30m");
 | 
			
		||||
std::string Logger::RED("\033[31m");
 | 
			
		||||
std::string Logger::GREEN("\033[32m");
 | 
			
		||||
std::string Logger::YELLOW("\033[33m");
 | 
			
		||||
std::string Logger::BLUE("\033[34m");
 | 
			
		||||
std::string Logger::PURPLE("\033[35m");
 | 
			
		||||
std::string Logger::CYAN("\033[36m");
 | 
			
		||||
std::string Logger::WHITE("\033[37m");
 | 
			
		||||
std::string Logger::NORMAL("\033[0;39m");
 | 
			
		||||
std::string EMPTY("");
 | 
			
		||||
 | 
			
		||||
GridLogger GridLogError      (1,"Error");
 | 
			
		||||
GridLogger GridLogWarning    (1,"Warning");
 | 
			
		||||
GridLogger GridLogMessage    (1,"Message");
 | 
			
		||||
GridLogger GridLogDebug      (1,"Debug");
 | 
			
		||||
GridLogger GridLogPerformance(1,"Performance");
 | 
			
		||||
GridLogger GridLogIterative  (1,"Iterative");
 | 
			
		||||
#if 0  
 | 
			
		||||
  GridLogger GridLogError      (1,"Error",Logger::RED);
 | 
			
		||||
  GridLogger GridLogWarning    (1,"Warning",Logger::YELLOW);
 | 
			
		||||
  GridLogger GridLogMessage    (1,"Message",Logger::BLACK);
 | 
			
		||||
  GridLogger GridLogDebug      (1,"Debug",Logger::PURPLE);
 | 
			
		||||
  GridLogger GridLogPerformance(1,"Performance",Logger::GREEN);
 | 
			
		||||
  GridLogger GridLogIterative  (1,"Iterative",Logger::BLUE);
 | 
			
		||||
  GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE);
 | 
			
		||||
#else
 | 
			
		||||
  GridLogger GridLogError      (1,"Error",EMPTY);
 | 
			
		||||
  GridLogger GridLogWarning    (1,"Warning",EMPTY);
 | 
			
		||||
  GridLogger GridLogMessage    (1,"Message",EMPTY);
 | 
			
		||||
  GridLogger GridLogDebug      (1,"Debug",EMPTY);
 | 
			
		||||
  GridLogger GridLogPerformance(1,"Performance",EMPTY);
 | 
			
		||||
  GridLogger GridLogIterative  (1,"Iterative",EMPTY);
 | 
			
		||||
  GridLogger GridLogIntegrator (1,"Integrator",EMPTY);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void GridLogConfigure(std::vector<std::string> &logstreams)
 | 
			
		||||
{
 | 
			
		||||
@@ -20,6 +71,20 @@ void GridLogConfigure(std::vector<std::string> &logstreams)
 | 
			
		||||
  GridLogIterative.Active(0);
 | 
			
		||||
  GridLogDebug.Active(0);
 | 
			
		||||
  GridLogPerformance.Active(0);
 | 
			
		||||
  GridLogIntegrator.Active(0);
 | 
			
		||||
 | 
			
		||||
  int blackAndWhite = 1;
 | 
			
		||||
  if(blackAndWhite){
 | 
			
		||||
    Logger::BLACK = std::string("");
 | 
			
		||||
    Logger::RED    =Logger::BLACK;
 | 
			
		||||
    Logger::GREEN  =Logger::BLACK;
 | 
			
		||||
    Logger::YELLOW =Logger::BLACK;
 | 
			
		||||
    Logger::BLUE   =Logger::BLACK;
 | 
			
		||||
    Logger::PURPLE =Logger::BLACK;
 | 
			
		||||
    Logger::CYAN   =Logger::BLACK;
 | 
			
		||||
    Logger::WHITE  =Logger::BLACK;
 | 
			
		||||
    Logger::NORMAL =Logger::BLACK;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<logstreams.size();i++){
 | 
			
		||||
    if ( logstreams[i]== std::string("Error")       ) GridLogError.Active(1);
 | 
			
		||||
@@ -28,6 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams)
 | 
			
		||||
    if ( logstreams[i]== std::string("Iterative")   ) GridLogIterative.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Debug")       ) GridLogDebug.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
 | 
			
		||||
    if ( logstreams[i]== std::string("Integrator" ) ) GridLogIntegrator.Active(1);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -36,13 +102,16 @@ void GridLogConfigure(std::vector<std::string> &logstreams)
 | 
			
		||||
////////////////////////////////////////////////////////////
 | 
			
		||||
void Grid_quiesce_nodes(void)
 | 
			
		||||
{
 | 
			
		||||
  int me=0;
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
  int me;
 | 
			
		||||
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
  me = shmem_my_pe();
 | 
			
		||||
#endif
 | 
			
		||||
  if ( me ) { 
 | 
			
		||||
    std::cout.setstate(std::ios::badbit);
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Grid_unquiesce_nodes(void)
 | 
			
		||||
@@ -52,11 +121,6 @@ void Grid_unquiesce_nodes(void)
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										94
									
								
								lib/Log.h
									
									
									
									
									
								
							
							
						
						
									
										94
									
								
								lib/Log.h
									
									
									
									
									
								
							@@ -1,21 +1,65 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Log.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Antonin Portelli <antonin.portelli@me.com>
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LOG_H
 | 
			
		||||
#define GRID_LOG_H
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#include <execinfo.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
 | 
			
		||||
int Rank(void); // used for early stage debug before library init
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<< (std::ostream& stream, const GridTime& time);
 | 
			
		||||
 | 
			
		||||
class Logger {
 | 
			
		||||
protected:
 | 
			
		||||
    int active;
 | 
			
		||||
    std::string name, topName;
 | 
			
		||||
    std::string name, topName, COLOUR;
 | 
			
		||||
public:
 | 
			
		||||
    static GridStopWatch StopWatch;
 | 
			
		||||
    static std::ostream devnull;
 | 
			
		||||
 | 
			
		||||
    static std::string BLACK;
 | 
			
		||||
    static std::string RED  ;
 | 
			
		||||
    static std::string GREEN;
 | 
			
		||||
    static std::string YELLOW;
 | 
			
		||||
    static std::string BLUE  ;
 | 
			
		||||
    static std::string PURPLE;
 | 
			
		||||
    static std::string CYAN  ;
 | 
			
		||||
    static std::string WHITE ;
 | 
			
		||||
    static std::string NORMAL;
 | 
			
		||||
    
 | 
			
		||||
    Logger(std::string topNm, int on, std::string nm)
 | 
			
		||||
    : active(on), name(nm), topName(topNm) {};
 | 
			
		||||
 Logger(std::string topNm, int on, std::string nm,std::string col)
 | 
			
		||||
   : active(on), name(nm), topName(topNm), COLOUR(col) {};
 | 
			
		||||
    
 | 
			
		||||
    void Active(int on) {active = on;};
 | 
			
		||||
    int  isActive(void) {return active;};
 | 
			
		||||
@@ -25,9 +69,10 @@ public:
 | 
			
		||||
            StopWatch.Stop();
 | 
			
		||||
            GridTime now = StopWatch.Elapsed();
 | 
			
		||||
            StopWatch.Start();
 | 
			
		||||
            stream << std::setw(8) << std::left << log.topName << " : ";
 | 
			
		||||
            stream << std::setw(12) << std::left << log.name << " : ";
 | 
			
		||||
            stream << now << " : ";
 | 
			
		||||
            stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
 | 
			
		||||
            stream << log.COLOUR <<std::setw(11)  << log.name << BLACK << " : ";
 | 
			
		||||
            stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
 | 
			
		||||
            stream << log.COLOUR;
 | 
			
		||||
            return stream;
 | 
			
		||||
        } else { 
 | 
			
		||||
            return devnull;
 | 
			
		||||
@@ -38,7 +83,7 @@ public:
 | 
			
		||||
    
 | 
			
		||||
class GridLogger: public Logger {
 | 
			
		||||
public:
 | 
			
		||||
  GridLogger(int on, std::string nm): Logger("Grid", on, nm){};
 | 
			
		||||
 GridLogger(int on, std::string nm, std::string col = Logger::BLACK): Logger("Grid", on, nm, col){};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void GridLogConfigure(std::vector<std::string> &logstreams);
 | 
			
		||||
@@ -49,6 +94,39 @@ extern GridLogger GridLogMessage;
 | 
			
		||||
extern GridLogger GridLogDebug  ;
 | 
			
		||||
extern GridLogger GridLogPerformance;
 | 
			
		||||
extern GridLogger GridLogIterative  ;
 | 
			
		||||
extern GridLogger GridLogIntegrator  ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define _NBACKTRACE (256)
 | 
			
		||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
			
		||||
 | 
			
		||||
#define BACKTRACEFILE() {\
 | 
			
		||||
    char string[20];					\
 | 
			
		||||
    std::sprintf(string,"backtrace.%d",Rank());				\
 | 
			
		||||
    std::FILE * fp = std::fopen(string,"w");				\
 | 
			
		||||
    BACKTRACEFP(fp)\
 | 
			
		||||
    std::fclose(fp);	    \
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_EXECINFO_H
 | 
			
		||||
#define BACKTRACEFP(fp) { \
 | 
			
		||||
  int symbols    = backtrace        (Grid_backtrace_buffer,_NBACKTRACE);\
 | 
			
		||||
  char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
 | 
			
		||||
  for (int i = 0; i < symbols; i++){\
 | 
			
		||||
    std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
 | 
			
		||||
  }\
 | 
			
		||||
}
 | 
			
		||||
#else 
 | 
			
		||||
#define BACKTRACEFP(fp) { \
 | 
			
		||||
    std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
 | 
			
		||||
    std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
 | 
			
		||||
    std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
 | 
			
		||||
    std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define BACKTRACE() BACKTRACEFP(stdout) 
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@@ -6,6 +6,10 @@ if BUILD_COMMS_MPI
 | 
			
		||||
  extra_sources+=communicator/Communicator_mpi.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_SHMEM
 | 
			
		||||
  extra_sources+=communicator/Communicator_shmem.cc
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
if BUILD_COMMS_NONE
 | 
			
		||||
  extra_sources+=communicator/Communicator_none.cc
 | 
			
		||||
endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								lib/Old/Endeavour.tgz
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Old/Tensor_peek.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_PEEK_H
 | 
			
		||||
#define GRID_MATH_PEEK_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Old/Tensor_poke.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_POKE_H
 | 
			
		||||
#define GRID_MATH_POKE_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
#include <PerfCount.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PERFCOUNT_H
 | 
			
		||||
#define GRID_PERFCOUNT_H
 | 
			
		||||
 | 
			
		||||
@@ -5,7 +34,7 @@
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/ioctl.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
@@ -14,8 +43,8 @@
 | 
			
		||||
#else
 | 
			
		||||
#include <sys/syscall.h>
 | 
			
		||||
#endif
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
			
		||||
@@ -29,6 +58,26 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __bgq__
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   uint64_t tmp;
 | 
			
		||||
   asm volatile ("mfspr %0,0x10C" : "=&r" (tmp)  );
 | 
			
		||||
   return tmp;
 | 
			
		||||
}
 | 
			
		||||
#elif defined __x86_64__
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
#ifndef __INTEL_COMPILER
 | 
			
		||||
#include <x86intrin.h>
 | 
			
		||||
#endif
 | 
			
		||||
inline uint64_t cyclecount(void){
 | 
			
		||||
   return __rdtsc();
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
#warning No cycle counter implemented for this architecture
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   return 0;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
class PerformanceCounter {
 | 
			
		||||
private:
 | 
			
		||||
@@ -73,7 +122,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  long long count;
 | 
			
		||||
  int fd;
 | 
			
		||||
  uint64_t elapsed;
 | 
			
		||||
  unsigned long long elapsed;
 | 
			
		||||
  uint64_t begin;
 | 
			
		||||
 | 
			
		||||
  static int NumTypes(void){ 
 | 
			
		||||
@@ -117,10 +166,10 @@ public:
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
    }
 | 
			
		||||
    begin  =__rdtsc();
 | 
			
		||||
    begin  =cyclecount();
 | 
			
		||||
#else
 | 
			
		||||
    begin = 0;
 | 
			
		||||
#endif
 | 
			
		||||
@@ -130,10 +179,10 @@ public:
 | 
			
		||||
    count=0;
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::read(fd, &count, sizeof(long long));
 | 
			
		||||
    }
 | 
			
		||||
    elapsed = __rdtsc() - begin;
 | 
			
		||||
    elapsed = cyclecount() - begin;
 | 
			
		||||
#else
 | 
			
		||||
    elapsed = 0;
 | 
			
		||||
#endif
 | 
			
		||||
@@ -141,16 +190,16 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void Report(void) {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
 | 
			
		||||
    std::printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count);
 | 
			
		||||
#else
 | 
			
		||||
    printf("%llu cycles \n", elapsed );
 | 
			
		||||
    std::printf("%llu cycles \n", elapsed );
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ~PerformanceCounter()
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    close(fd);
 | 
			
		||||
    ::close(fd);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										32
									
								
								lib/Simd.h
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								lib/Simd.h
									
									
									
									
									
								
							@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Simd.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SIMD_H
 | 
			
		||||
#define GRID_SIMD_H
 | 
			
		||||
 | 
			
		||||
@@ -13,10 +42,13 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
 | 
			
		||||
#define _MM_SELECT_FOUR_FOUR_STRING(A,B,C,D) "((" #A "<<6)|(" #B "<<4)|(" #C "<<2)|(" #D "))"
 | 
			
		||||
#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
 | 
			
		||||
#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
 | 
			
		||||
#define _MM_SELECT_TWO_TWO  (A,B)     _MM_SELECT_FOUR_TWO(0,0,A,B)
 | 
			
		||||
 | 
			
		||||
#define RotateBit (0x100)
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  typedef uint32_t Integer;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										482
									
								
								lib/Stencil.h
									
									
									
									
									
								
							
							
						
						
									
										482
									
								
								lib/Stencil.h
									
									
									
									
									
								
							@@ -1,6 +1,35 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Stencil.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_STENCIL_H
 | 
			
		||||
#define GRID_STENCIL_H
 | 
			
		||||
 | 
			
		||||
#include <thread>
 | 
			
		||||
 | 
			
		||||
#include <stencil/Lebesgue.h>   // subdir aggregate
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -48,7 +77,7 @@ namespace Grid {
 | 
			
		||||
    int _around_the_world;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class cobj, class compressor>
 | 
			
		||||
  template<class vobj,class cobj>
 | 
			
		||||
  class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
@@ -57,10 +86,158 @@ namespace Grid {
 | 
			
		||||
      typedef typename cobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename cobj::scalar_object scalar_object;
 | 
			
		||||
 | 
			
		||||
      //////////////////////////////////////////
 | 
			
		||||
      // Comms packet queue for asynch thread
 | 
			
		||||
      //////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
      struct Packet {
 | 
			
		||||
	void * send_buf;
 | 
			
		||||
	void * recv_buf;
 | 
			
		||||
	Integer to_rank;
 | 
			
		||||
	Integer from_rank;
 | 
			
		||||
	Integer bytes;
 | 
			
		||||
	volatile Integer done;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      std::vector<Packet> Packets;
 | 
			
		||||
 | 
			
		||||
#define SEND_IMMEDIATE
 | 
			
		||||
#define SERIAL_SENDS
 | 
			
		||||
 | 
			
		||||
      void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
 | 
			
		||||
	comms_bytes+=2.0*bytes;
 | 
			
		||||
#ifdef SEND_IMMEDIATE
 | 
			
		||||
	commtime-=usecond();
 | 
			
		||||
	_grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
 | 
			
		||||
	commtime+=usecond();
 | 
			
		||||
#endif
 | 
			
		||||
	Packet p;
 | 
			
		||||
	p.send_buf = xmit;
 | 
			
		||||
	p.recv_buf = rcv;
 | 
			
		||||
	p.to_rank  = to;
 | 
			
		||||
	p.from_rank= from;
 | 
			
		||||
	p.bytes    = bytes;
 | 
			
		||||
	p.done     = 0;
 | 
			
		||||
	comms_bytes+=2.0*bytes;
 | 
			
		||||
	Packets.push_back(p);
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
#ifdef SERIAL_SENDS
 | 
			
		||||
      void Communicate(void ) { 
 | 
			
		||||
	commtime-=usecond();
 | 
			
		||||
	for(int i=0;i<Packets.size();i++){
 | 
			
		||||
#ifndef SEND_IMMEDIATE
 | 
			
		||||
	  _grid->SendToRecvFrom(
 | 
			
		||||
				Packets[i].send_buf,
 | 
			
		||||
				Packets[i].to_rank,
 | 
			
		||||
				Packets[i].recv_buf,
 | 
			
		||||
				Packets[i].from_rank,
 | 
			
		||||
				Packets[i].bytes);
 | 
			
		||||
#endif
 | 
			
		||||
	  Packets[i].done = 1;
 | 
			
		||||
	}
 | 
			
		||||
	commtime+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
#else
 | 
			
		||||
      void Communicate(void ) { 
 | 
			
		||||
	typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
 | 
			
		||||
	std::vector<std::vector<CommsRequest_t> > reqs(Packets.size());
 | 
			
		||||
	commtime-=usecond();
 | 
			
		||||
	const int concurrency=2;
 | 
			
		||||
	for(int i=0;i<Packets.size();i+=concurrency){
 | 
			
		||||
	  for(int ii=0;ii<concurrency;ii++){
 | 
			
		||||
	    int j = i+ii;
 | 
			
		||||
	    if ( j<Packets.size() ) {
 | 
			
		||||
#ifndef SEND_IMMEDIATE
 | 
			
		||||
	      _grid->SendToRecvFromBegin(reqs[j],
 | 
			
		||||
					 Packets[j].send_buf,
 | 
			
		||||
					 Packets[j].to_rank,
 | 
			
		||||
					 Packets[j].recv_buf,
 | 
			
		||||
					 Packets[j].from_rank,
 | 
			
		||||
					 Packets[j].bytes);
 | 
			
		||||
#endif
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  for(int ii=0;ii<concurrency;ii++){
 | 
			
		||||
	    int j = i+ii;
 | 
			
		||||
	    if ( j<Packets.size() ) {
 | 
			
		||||
#ifndef SEND_IMMEDIATE
 | 
			
		||||
	      _grid->SendToRecvFromComplete(reqs[i]);
 | 
			
		||||
#endif
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  for(int ii=0;ii<concurrency;ii++){
 | 
			
		||||
	    int j = i+ii;
 | 
			
		||||
	    if ( j<Packets.size() ) {
 | 
			
		||||
	      Packets[j].done = 1;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	commtime+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      // Simd merge queue for asynch comms
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      struct Merge {
 | 
			
		||||
        cobj * mpointer;
 | 
			
		||||
	std::vector<scalar_object *> rpointers;
 | 
			
		||||
	Integer buffer_size;
 | 
			
		||||
	Integer packet_id;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      std::vector<Merge> Mergers;
 | 
			
		||||
 | 
			
		||||
      void AddMerge(cobj *merge_p,std::vector<scalar_object *> &rpointers,Integer buffer_size,Integer packet_id) {
 | 
			
		||||
	Merge m;
 | 
			
		||||
	m.mpointer = merge_p;
 | 
			
		||||
	m.rpointers= rpointers;
 | 
			
		||||
	m.buffer_size = buffer_size;
 | 
			
		||||
	m.packet_id   = packet_id;
 | 
			
		||||
#ifdef SEND_IMMEDIATE
 | 
			
		||||
	mergetime-=usecond();
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int o=0;o<m.buffer_size;o++){
 | 
			
		||||
	  merge1(m.mpointer[o],m.rpointers,o);
 | 
			
		||||
	}
 | 
			
		||||
	mergetime+=usecond();
 | 
			
		||||
#else
 | 
			
		||||
	Mergers.push_back(m);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      void CommsMerge(void ) { 
 | 
			
		||||
	//PARALLEL_NESTED_LOOP2 
 | 
			
		||||
	for(int i=0;i<Mergers.size();i++){	
 | 
			
		||||
	  
 | 
			
		||||
	spintime-=usecond();
 | 
			
		||||
	int packet_id = Mergers[i].packet_id;
 | 
			
		||||
	while(! Packets[packet_id].done ); // spin for completion
 | 
			
		||||
	spintime+=usecond();
 | 
			
		||||
 | 
			
		||||
#ifndef SEND_IMMEDIATE
 | 
			
		||||
	mergetime-=usecond();
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
	  for(int o=0;o<Mergers[i].buffer_size;o++){
 | 
			
		||||
	    merge1(Mergers[i].mpointer[o],Mergers[i].rpointers,o);
 | 
			
		||||
	  }
 | 
			
		||||
	mergetime+=usecond();
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      ////////////////////////////////////////
 | 
			
		||||
      // Basic Grid and stencil info
 | 
			
		||||
      ////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
      int                               _checkerboard;
 | 
			
		||||
      int                               _npoints; // Move to template param?
 | 
			
		||||
      GridBase *                        _grid;
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      // npoints of these
 | 
			
		||||
      std::vector<int>                  _directions;
 | 
			
		||||
      std::vector<int>                  _distances;
 | 
			
		||||
@@ -69,32 +246,32 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      // npoints x Osites() of these
 | 
			
		||||
      std::vector<std::vector<StencilEntry> > _entries;
 | 
			
		||||
 | 
			
		||||
      // Comms buffers
 | 
			
		||||
      std::vector<std::vector<scalar_object> > send_buf_extract;
 | 
			
		||||
      std::vector<std::vector<scalar_object> > recv_buf_extract;
 | 
			
		||||
      std::vector<scalar_object *> pointers;
 | 
			
		||||
      std::vector<scalar_object *> rpointers;
 | 
			
		||||
      Vector<cobj> send_buf;
 | 
			
		||||
 | 
			
		||||
      inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point][osite]; }
 | 
			
		||||
 | 
			
		||||
      // Comms buffers
 | 
			
		||||
      std::vector<Vector<scalar_object> > u_simd_send_buf;
 | 
			
		||||
      std::vector<Vector<scalar_object> > u_simd_recv_buf;
 | 
			
		||||
      Vector<cobj>          u_send_buf;
 | 
			
		||||
      Vector<cobj>          comm_buf;
 | 
			
		||||
      int u_comm_offset;
 | 
			
		||||
      int _unified_buffer_size;
 | 
			
		||||
      int _request_count;
 | 
			
		||||
 | 
			
		||||
      double buftime;
 | 
			
		||||
      /////////////////////////////////////////
 | 
			
		||||
      // Timing info; ugly; possibly temporary
 | 
			
		||||
      /////////////////////////////////////////
 | 
			
		||||
#define TIMING_HACK
 | 
			
		||||
#ifdef TIMING_HACK
 | 
			
		||||
      double jointime;
 | 
			
		||||
      double gathertime;
 | 
			
		||||
      double commtime;
 | 
			
		||||
      double commstime;
 | 
			
		||||
      double halotime;
 | 
			
		||||
      double scattertime;
 | 
			
		||||
      double halogtime;
 | 
			
		||||
      double mergetime;
 | 
			
		||||
      double spintime;
 | 
			
		||||
      double comms_bytes;
 | 
			
		||||
      double gathermtime;
 | 
			
		||||
      double splicetime;
 | 
			
		||||
      double nosplicetime;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  CartesianStencil(GridBase *grid,
 | 
			
		||||
				     int npoints,
 | 
			
		||||
@@ -103,28 +280,29 @@ namespace Grid {
 | 
			
		||||
				     const std::vector<int> &distances) 
 | 
			
		||||
    :   _entries(npoints), _permute_type(npoints), _comm_buf_size(npoints)
 | 
			
		||||
    {
 | 
			
		||||
#ifdef TIMING_HACK
 | 
			
		||||
      gathertime=0;
 | 
			
		||||
      jointime=0;
 | 
			
		||||
      commtime=0;
 | 
			
		||||
      commstime=0;
 | 
			
		||||
      halotime=0;
 | 
			
		||||
      scattertime=0;
 | 
			
		||||
      halogtime=0;
 | 
			
		||||
      mergetime=0;
 | 
			
		||||
      spintime=0;
 | 
			
		||||
      gathermtime=0;
 | 
			
		||||
      buftime=0;
 | 
			
		||||
      splicetime=0;
 | 
			
		||||
      nosplicetime=0;
 | 
			
		||||
 | 
			
		||||
      comms_bytes=0;
 | 
			
		||||
#endif
 | 
			
		||||
      _npoints = npoints;
 | 
			
		||||
      _grid    = grid;
 | 
			
		||||
      _directions = directions;
 | 
			
		||||
      _distances  = distances;
 | 
			
		||||
      _unified_buffer_size=0;
 | 
			
		||||
      _request_count =0;
 | 
			
		||||
 | 
			
		||||
      int osites  = _grid->oSites();
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<npoints;i++){
 | 
			
		||||
      for(int ii=0;ii<npoints;ii++){
 | 
			
		||||
 | 
			
		||||
	int i = ii; // reverse direction to get SIMD comms done first
 | 
			
		||||
	int point = i;
 | 
			
		||||
 | 
			
		||||
	_entries[i].resize( osites);
 | 
			
		||||
@@ -143,6 +321,9 @@ namespace Grid {
 | 
			
		||||
	int simd_layout     = _grid->_simd_layout[dimension];
 | 
			
		||||
	int comm_dim        = _grid->_processors[dimension] >1 ;
 | 
			
		||||
	int splice_dim      = _grid->_simd_layout[dimension]>1 && (comm_dim);
 | 
			
		||||
	int rotate_dim      = _grid->_simd_layout[dimension]>2;
 | 
			
		||||
 | 
			
		||||
	assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported
 | 
			
		||||
 | 
			
		||||
	int sshift[2];
 | 
			
		||||
	
 | 
			
		||||
@@ -163,30 +344,35 @@ namespace Grid {
 | 
			
		||||
	  //        So tables are the same whether comm_dim or splice_dim
 | 
			
		||||
	  sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
 | 
			
		||||
	  sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
 | 
			
		||||
 | 
			
		||||
	  if ( sshift[0] == sshift[1] ) {
 | 
			
		||||
	    Comms(point,dimension,shift,0x3);
 | 
			
		||||
	    //	    std::cout<<"Comms 0x3"<<std::endl;
 | 
			
		||||
	  } else {
 | 
			
		||||
	    Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
 | 
			
		||||
	    Comms(point,dimension,shift,0x2);// both with block stride loop iteration
 | 
			
		||||
	    //	    std::cout<<"Comms 0x1 ; 0x2"<<std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	//	for(int ss=0;ss<osites;ss++){
 | 
			
		||||
	  //	  std::cout << "point["<<i<<"] "<<ss<<"-> o"<<_entries[i][ss]._offset<<"; l"<<
 | 
			
		||||
	  //	    _entries[i][ss]._is_local<<"; p"<<_entries[i][ss]._permute<<std::endl;
 | 
			
		||||
	//	}
 | 
			
		||||
      }
 | 
			
		||||
      u_send_buf.resize(_unified_buffer_size);
 | 
			
		||||
      comm_buf.resize(_unified_buffer_size);
 | 
			
		||||
      
 | 
			
		||||
      const int Nsimd = grid->Nsimd();
 | 
			
		||||
      u_simd_send_buf.resize(Nsimd);
 | 
			
		||||
      u_simd_recv_buf.resize(Nsimd);
 | 
			
		||||
      for(int l=0;l<Nsimd;l++){
 | 
			
		||||
	u_simd_send_buf[l].resize(_unified_buffer_size);
 | 
			
		||||
	u_simd_recv_buf[l].resize(_unified_buffer_size);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    void Local     (int point, int dimension,int shiftpm,int cbmask)
 | 
			
		||||
    {
 | 
			
		||||
      int fd = _grid->_fdimensions[dimension];
 | 
			
		||||
      int rd = _grid->_rdimensions[dimension];
 | 
			
		||||
      int ld = _grid->_ldimensions[dimension];
 | 
			
		||||
      int gd = _grid->_gdimensions[dimension];
 | 
			
		||||
      
 | 
			
		||||
      int ly = _grid->_simd_layout[dimension];
 | 
			
		||||
 | 
			
		||||
      // Map to always positive shift modulo global full dimension.
 | 
			
		||||
      int shift = (shiftpm+fd)%fd;
 | 
			
		||||
      
 | 
			
		||||
@@ -210,14 +396,13 @@ namespace Grid {
 | 
			
		||||
	if ( (shiftpm== 1) && (sx<x)  ) {
 | 
			
		||||
	  wraparound = 1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	  
 | 
			
		||||
	int permute_slice=0;
 | 
			
		||||
	if(permute_dim){
 | 
			
		||||
	  int wrap = sshift/rd;
 | 
			
		||||
	  int  num = sshift%rd;
 | 
			
		||||
	  if ( x< rd-num ) permute_slice=wrap;
 | 
			
		||||
	  else permute_slice = 1-wrap;
 | 
			
		||||
	  else permute_slice = (wrap+1)%ly;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
  	CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
 | 
			
		||||
@@ -228,6 +413,7 @@ namespace Grid {
 | 
			
		||||
    void Comms     (int point,int dimension,int shiftpm,int cbmask)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid=_grid;
 | 
			
		||||
      const int Nsimd = grid->Nsimd();
 | 
			
		||||
      
 | 
			
		||||
      int fd              = _grid->_fdimensions[dimension];
 | 
			
		||||
      int ld              = _grid->_ldimensions[dimension];
 | 
			
		||||
@@ -236,27 +422,47 @@ namespace Grid {
 | 
			
		||||
      int simd_layout     = _grid->_simd_layout[dimension];
 | 
			
		||||
      int comm_dim        = _grid->_processors[dimension] >1 ;
 | 
			
		||||
      
 | 
			
		||||
      //      assert(simd_layout==1); // Why?
 | 
			
		||||
      assert(comm_dim==1);
 | 
			
		||||
      int shift = (shiftpm + fd) %fd;
 | 
			
		||||
      assert(shift>=0);
 | 
			
		||||
      assert(shift<fd);
 | 
			
		||||
      
 | 
			
		||||
      int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
      int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored
 | 
			
		||||
 | 
			
		||||
      _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
 | 
			
		||||
                                           // send to one or more remote nodes.
 | 
			
		||||
 | 
			
		||||
      int cb= (cbmask==0x2)? Odd : Even;
 | 
			
		||||
      int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      for(int x=0;x<rd;x++){       
 | 
			
		||||
 | 
			
		||||
	int sx        =  (x+sshift)%rd;
 | 
			
		||||
	int comm_proc = ((x+sshift)/rd)%pd;
 | 
			
		||||
    	int offnode = (comm_proc!= 0);
 | 
			
		||||
	int permute_type=grid->PermuteType(dimension);
 | 
			
		||||
 | 
			
		||||
	int sx        =  (x+sshift)%rd;
 | 
			
		||||
	
 | 
			
		||||
    	int offnode = 0;
 | 
			
		||||
	if ( simd_layout > 1 ) {
 | 
			
		||||
 | 
			
		||||
	  for(int i=0;i<Nsimd;i++){
 | 
			
		||||
 | 
			
		||||
	    int inner_bit = (Nsimd>>(permute_type+1));
 | 
			
		||||
	    int ic= (i&inner_bit)? 1:0;
 | 
			
		||||
	    int my_coor          = rd*ic + x;
 | 
			
		||||
	    int nbr_coor         = my_coor+sshift;
 | 
			
		||||
	    int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
 | 
			
		||||
 | 
			
		||||
	    if ( nbr_proc ) { 
 | 
			
		||||
	      offnode =1;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  
 | 
			
		||||
	} else { 
 | 
			
		||||
	  int comm_proc = ((x+sshift)/rd)%pd;
 | 
			
		||||
	  offnode = (comm_proc!= 0);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	//	std::cout << "Stencil shift "<<shift<<" sshift "<<sshift<<" fd "<<fd<<" rd " <<rd<<" offnode "<<offnode<<" sx "<<sx<<std::endl;
 | 
			
		||||
	int wraparound=0;
 | 
			
		||||
	if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
 | 
			
		||||
	  wraparound = 1;
 | 
			
		||||
@@ -274,14 +480,13 @@ namespace Grid {
 | 
			
		||||
	  int words = buffer_size;
 | 
			
		||||
	  if (cbmask != 0x3) words=words>>1;
 | 
			
		||||
	  
 | 
			
		||||
	  //	  GatherPlaneSimple (point,dimension,sx,cbmask);
 | 
			
		||||
	  
 | 
			
		||||
	  int rank           = grid->_processor;
 | 
			
		||||
	  int recv_from_rank;
 | 
			
		||||
	  int xmit_to_rank;
 | 
			
		||||
 | 
			
		||||
	  int unified_buffer_offset = _unified_buffer_size;
 | 
			
		||||
	  _unified_buffer_size    += words;
 | 
			
		||||
 | 
			
		||||
	  ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
 | 
			
		||||
	  
 | 
			
		||||
	}
 | 
			
		||||
@@ -327,7 +532,7 @@ namespace Grid {
 | 
			
		||||
	      _entries[point][lo+o+b]._around_the_world=wrap;
 | 
			
		||||
	    }
 | 
			
		||||
	    
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  o +=_grid->_slice_stride[dimension];
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
@@ -377,35 +582,36 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
//      CartesianStencil(GridBase *grid,
 | 
			
		||||
//		       int npoints,
 | 
			
		||||
//		       int checkerboard,
 | 
			
		||||
//		       const std::vector<int> &directions,
 | 
			
		||||
//		       const std::vector<int> &distances);
 | 
			
		||||
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
      std::thread HaloExchangeBegin(const Lattice<vobj> &source,compressor &compress) {
 | 
			
		||||
	Mergers.resize(0); 
 | 
			
		||||
	Packets.resize(0);
 | 
			
		||||
	HaloGather(source,compress);
 | 
			
		||||
        return std::thread([&] { this->Communicate(); });
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Add to tables for various cases;  is this mistaken. only local if 1 proc in dim
 | 
			
		||||
      // Can this be avoided with simpler coding of comms?
 | 
			
		||||
   //      void Local     (int point, int dimension,int shift,int cbmask);
 | 
			
		||||
   //      void Comms     (int point, int dimension,int shift,int cbmask);
 | 
			
		||||
   //      void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap);
 | 
			
		||||
   //      void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset,int wrap);
 | 
			
		||||
 | 
			
		||||
      // Could allow a functional munging of the halo to another type during the comms.
 | 
			
		||||
      // this could implement the 16bit/32bit/64bit compression.
 | 
			
		||||
      void HaloExchange(const Lattice<vobj> &source,std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,compressor &compress)
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
      void HaloExchange(const Lattice<vobj> &source,compressor &compress) 
 | 
			
		||||
      {
 | 
			
		||||
	// conformable(source._grid,_grid);
 | 
			
		||||
	assert(source._grid==_grid);
 | 
			
		||||
	halotime-=usecond();
 | 
			
		||||
	if (u_comm_buf.size() != _unified_buffer_size ) u_comm_buf.resize(_unified_buffer_size);
 | 
			
		||||
	int u_comm_offset=0;
 | 
			
		||||
	Mergers.resize(0); 
 | 
			
		||||
	Packets.resize(0);
 | 
			
		||||
	HaloGather(source,compress);
 | 
			
		||||
	Communicate();
 | 
			
		||||
	CommsMerge();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
	// Gather all comms buffers
 | 
			
		||||
	for(int point = 0 ; point < _npoints; point++) {
 | 
			
		||||
 | 
			
		||||
	  compress.Point(point);
 | 
			
		||||
      void HaloExchangeComplete(std::thread &thr) 
 | 
			
		||||
      {
 | 
			
		||||
	CommsMerge(); // spins
 | 
			
		||||
	jointime-=usecond();
 | 
			
		||||
	thr.join();
 | 
			
		||||
	jointime+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
      void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point)
 | 
			
		||||
      {
 | 
			
		||||
	  int dimension    = _directions[point];
 | 
			
		||||
	  int displacement = _distances[point];
 | 
			
		||||
	  
 | 
			
		||||
@@ -432,35 +638,51 @@ namespace Grid {
 | 
			
		||||
	    if ( sshift[0] == sshift[1] ) {
 | 
			
		||||
	      if (splice_dim) {
 | 
			
		||||
		splicetime-=usecond();
 | 
			
		||||
		GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
 | 
			
		||||
		GatherSimd(source,dimension,shift,0x3,compress);
 | 
			
		||||
		splicetime+=usecond();
 | 
			
		||||
	      } else { 
 | 
			
		||||
		nosplicetime-=usecond();
 | 
			
		||||
		GatherStartComms(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
 | 
			
		||||
		Gather(source,dimension,shift,0x3,compress);
 | 
			
		||||
		nosplicetime+=usecond();
 | 
			
		||||
	      }
 | 
			
		||||
	    } else {
 | 
			
		||||
	      std::cout << "dim "<<dimension<<"cb "<<_checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
 | 
			
		||||
	      if(splice_dim){
 | 
			
		||||
		splicetime-=usecond();
 | 
			
		||||
		GatherStartCommsSimd(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);// if checkerboard is unfavourable take two passes
 | 
			
		||||
		GatherStartCommsSimd(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);// both with block stride loop iteration
 | 
			
		||||
		GatherSimd(source,dimension,shift,0x1,compress);// if checkerboard is unfavourable take two passes
 | 
			
		||||
		GatherSimd(source,dimension,shift,0x2,compress);// both with block stride loop iteration
 | 
			
		||||
		splicetime+=usecond();
 | 
			
		||||
	      } else {
 | 
			
		||||
		nosplicetime-=usecond();
 | 
			
		||||
		GatherStartComms(source,dimension,shift,0x1,u_comm_buf,u_comm_offset,compress);
 | 
			
		||||
		GatherStartComms(source,dimension,shift,0x2,u_comm_buf,u_comm_offset,compress);
 | 
			
		||||
		Gather(source,dimension,shift,0x1,compress);
 | 
			
		||||
		Gather(source,dimension,shift,0x2,compress);
 | 
			
		||||
		nosplicetime+=usecond();
 | 
			
		||||
	      }
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	halotime+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
        void GatherStartComms(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
 | 
			
		||||
			      std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
 | 
			
		||||
			      int &u_comm_offset,compressor & compress)
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
      void HaloGather(const Lattice<vobj> &source,compressor &compress)
 | 
			
		||||
      {
 | 
			
		||||
	// conformable(source._grid,_grid);
 | 
			
		||||
	assert(source._grid==_grid);
 | 
			
		||||
	halogtime-=usecond();
 | 
			
		||||
 | 
			
		||||
	assert (comm_buf.size() == _unified_buffer_size );
 | 
			
		||||
	u_comm_offset=0;
 | 
			
		||||
 | 
			
		||||
	// Gather all comms buffers
 | 
			
		||||
	for(int point = 0 ; point < _npoints; point++) {
 | 
			
		||||
	  compress.Point(point);
 | 
			
		||||
	  HaloGatherDir(source,compress,point);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	assert(u_comm_offset==_unified_buffer_size);
 | 
			
		||||
	halogtime+=usecond();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
        void Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress)
 | 
			
		||||
	{
 | 
			
		||||
	  typedef typename cobj::vector_type vector_type;
 | 
			
		||||
	  typedef typename cobj::scalar_type scalar_type;
 | 
			
		||||
@@ -481,8 +703,6 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
	  int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
	  if(send_buf.size()<buffer_size) send_buf.resize(buffer_size);
 | 
			
		||||
 | 
			
		||||
	  int cb= (cbmask==0x2)? Odd : Even;
 | 
			
		||||
	  int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
			
		||||
 | 
			
		||||
@@ -499,38 +719,33 @@ namespace Grid {
 | 
			
		||||
	      int bytes = words * sizeof(cobj);
 | 
			
		||||
 | 
			
		||||
	      gathertime-=usecond();
 | 
			
		||||
	      Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask,compress);
 | 
			
		||||
	      Gather_plane_simple (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset);
 | 
			
		||||
	      gathertime+=usecond();
 | 
			
		||||
 | 
			
		||||
	      int rank           = _grid->_processor;
 | 
			
		||||
	      int recv_from_rank;
 | 
			
		||||
	      int xmit_to_rank;
 | 
			
		||||
	      _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	      assert (xmit_to_rank != _grid->ThisRank());
 | 
			
		||||
	      assert (xmit_to_rank   != _grid->ThisRank());
 | 
			
		||||
	      assert (recv_from_rank != _grid->ThisRank());
 | 
			
		||||
 | 
			
		||||
	      //      FIXME Implement asynchronous send & also avoid buffer copy
 | 
			
		||||
	      commtime-=usecond();
 | 
			
		||||
	      _grid->SendToRecvFrom((void *)&send_buf[0],
 | 
			
		||||
				   xmit_to_rank,
 | 
			
		||||
				    (void *)&u_comm_buf[u_comm_offset],
 | 
			
		||||
				   recv_from_rank,
 | 
			
		||||
				   bytes);
 | 
			
		||||
	      commtime+=usecond();
 | 
			
		||||
 | 
			
		||||
	      AddPacket((void *)&u_send_buf[u_comm_offset],
 | 
			
		||||
			(void *)  &comm_buf[u_comm_offset],
 | 
			
		||||
			xmit_to_rank,
 | 
			
		||||
			recv_from_rank,
 | 
			
		||||
			bytes);
 | 
			
		||||
			
 | 
			
		||||
	      u_comm_offset+=words;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	void  GatherStartCommsSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,
 | 
			
		||||
				   std::vector<cobj,alignedAllocator<cobj> > &u_comm_buf,
 | 
			
		||||
				   int &u_comm_offset,compressor &compress)
 | 
			
		||||
      template<class compressor>
 | 
			
		||||
	void  GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress)
 | 
			
		||||
	{
 | 
			
		||||
	  buftime-=usecond();
 | 
			
		||||
	  const int Nsimd = _grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
	  
 | 
			
		||||
	  int fd = _grid->_fdimensions[dimension];
 | 
			
		||||
	  int rd = _grid->_rdimensions[dimension];
 | 
			
		||||
@@ -540,6 +755,7 @@ namespace Grid {
 | 
			
		||||
	  int comm_dim        = _grid->_processors[dimension] >1 ;
 | 
			
		||||
 | 
			
		||||
	  assert(comm_dim==1);
 | 
			
		||||
	  // This will not work with a rotate dim
 | 
			
		||||
	  assert(simd_layout==2);
 | 
			
		||||
	  assert(shift>=0);
 | 
			
		||||
	  assert(shift<fd);
 | 
			
		||||
@@ -554,22 +770,11 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
	  assert(cbmask==0x3); // Fixme think there is a latent bug if not true
 | 
			
		||||
 | 
			
		||||
	  //	Should grow to max size and then cost very little thereafter
 | 
			
		||||
	  send_buf_extract.resize(Nsimd);
 | 
			
		||||
	  recv_buf_extract.resize(Nsimd);
 | 
			
		||||
	  for(int l=0;l<Nsimd;l++){
 | 
			
		||||
	    if( send_buf_extract[l].size() < buffer_size) {
 | 
			
		||||
	      send_buf_extract[l].resize(buffer_size);
 | 
			
		||||
	      recv_buf_extract[l].resize(buffer_size);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	  pointers.resize(Nsimd);
 | 
			
		||||
	  rpointers.resize(Nsimd);
 | 
			
		||||
 | 
			
		||||
	  int bytes = buffer_size*sizeof(scalar_object);
 | 
			
		||||
	  
 | 
			
		||||
	  buftime+=usecond();
 | 
			
		||||
	  
 | 
			
		||||
 | 
			
		||||
	  std::vector<scalar_object *> rpointers(Nsimd);
 | 
			
		||||
	  std::vector<scalar_object *> spointers(Nsimd);
 | 
			
		||||
 | 
			
		||||
	  ///////////////////////////////////////////
 | 
			
		||||
	  // Work out what to send where
 | 
			
		||||
	  ///////////////////////////////////////////
 | 
			
		||||
@@ -585,16 +790,20 @@ namespace Grid {
 | 
			
		||||
	    if ( any_offnode ) {
 | 
			
		||||
 | 
			
		||||
	      for(int i=0;i<Nsimd;i++){       
 | 
			
		||||
		pointers[i] = &send_buf_extract[i][0];
 | 
			
		||||
		spointers[i] = &u_simd_send_buf[i][u_comm_offset];
 | 
			
		||||
	      }
 | 
			
		||||
 | 
			
		||||
	      int sx   = (x+sshift)%rd;
 | 
			
		||||
	      
 | 
			
		||||
	      gathermtime-=usecond();
 | 
			
		||||
	      Gather_plane_extract<cobj>(rhs,pointers,dimension,sx,cbmask,compress);
 | 
			
		||||
	      Gather_plane_extract<cobj>(rhs,spointers,dimension,sx,cbmask,compress);
 | 
			
		||||
	      gathermtime+=usecond();
 | 
			
		||||
 | 
			
		||||
	      for(int i=0;i<Nsimd;i++){
 | 
			
		||||
		
 | 
			
		||||
		// FIXME 
 | 
			
		||||
		// This logic is hard coded to simd_layout ==2 and not allowing >2
 | 
			
		||||
		//		std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<<std::endl;
 | 
			
		||||
 | 
			
		||||
		int inner_bit = (Nsimd>>(permute_type+1));
 | 
			
		||||
		int ic= (i&inner_bit)? 1:0;
 | 
			
		||||
@@ -607,44 +816,41 @@ namespace Grid {
 | 
			
		||||
		int nbr_ox   = (nbr_lcoor%rd);    // outer coord of peer
 | 
			
		||||
		int nbr_lane = (i&(~inner_bit));
 | 
			
		||||
		
 | 
			
		||||
		int recv_from_rank;
 | 
			
		||||
		int xmit_to_rank;
 | 
			
		||||
		
 | 
			
		||||
		if (nbr_ic) nbr_lane|=inner_bit;
 | 
			
		||||
		assert (sx == nbr_ox);
 | 
			
		||||
 | 
			
		||||
		
 | 
			
		||||
		auto rp = &u_simd_recv_buf[i       ][u_comm_offset];
 | 
			
		||||
		auto sp = &u_simd_send_buf[nbr_lane][u_comm_offset];
 | 
			
		||||
 | 
			
		||||
		void *vrp = (void *)rp;
 | 
			
		||||
		void *vsp = (void *)sp;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		if(nbr_proc){
 | 
			
		||||
		  
 | 
			
		||||
		  int recv_from_rank;
 | 
			
		||||
		  int xmit_to_rank;
 | 
			
		||||
 | 
			
		||||
		  _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); 
 | 
			
		||||
		  
 | 
			
		||||
		  commstime-=usecond();
 | 
			
		||||
		  _grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
 | 
			
		||||
					xmit_to_rank,
 | 
			
		||||
					(void *)&recv_buf_extract[i][0],
 | 
			
		||||
					recv_from_rank,
 | 
			
		||||
					bytes);
 | 
			
		||||
		  commstime+=usecond();
 | 
			
		||||
		  AddPacket( vsp,vrp,xmit_to_rank,recv_from_rank,bytes);
 | 
			
		||||
		  
 | 
			
		||||
		  rpointers[i] = &recv_buf_extract[i][0];
 | 
			
		||||
		  rpointers[i] = rp;
 | 
			
		||||
 | 
			
		||||
		} else { 
 | 
			
		||||
		  rpointers[i] = &send_buf_extract[nbr_lane][0];
 | 
			
		||||
 | 
			
		||||
		  rpointers[i] = sp;
 | 
			
		||||
 | 
			
		||||
		}
 | 
			
		||||
	      }
 | 
			
		||||
 | 
			
		||||
	      // Here we don't want to scatter, just place into a buffer.
 | 
			
		||||
	      mergetime-=usecond();
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
	      for(int i=0;i<buffer_size;i++){
 | 
			
		||||
		//		assert(u_comm_offset+i<_unified_buffer_size);
 | 
			
		||||
		merge(u_comm_buf[u_comm_offset+i],rpointers,i);
 | 
			
		||||
	      }
 | 
			
		||||
	      mergetime+=usecond();
 | 
			
		||||
	      u_comm_offset+=buffer_size;
 | 
			
		||||
	      AddMerge(&comm_buf[u_comm_offset],rpointers,buffer_size,Packets.size()-1);
 | 
			
		||||
 | 
			
		||||
	      u_comm_offset     +=buffer_size;
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Tensors.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATH_H
 | 
			
		||||
#define GRID_MATH_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Threads.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_THREADS_H
 | 
			
		||||
#define GRID_THREADS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										46
									
								
								lib/Timer.h
									
									
									
									
									
								
							
							
						
						
									
										46
									
								
								lib/Timer.h
									
									
									
									
									
								
							@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Timer.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_TIME_H
 | 
			
		||||
#define GRID_TIME_H
 | 
			
		||||
 | 
			
		||||
@@ -16,13 +44,19 @@ double usecond(void);
 | 
			
		||||
typedef  std::chrono::system_clock          GridClock;
 | 
			
		||||
typedef  std::chrono::time_point<GridClock> GridTimePoint;
 | 
			
		||||
typedef  std::chrono::milliseconds          GridTime;
 | 
			
		||||
typedef  std::chrono::microseconds          GridUsecs;
 | 
			
		||||
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
class GridStopWatch {
 | 
			
		||||
private:
 | 
			
		||||
  bool running;
 | 
			
		||||
  GridTimePoint start;
 | 
			
		||||
  GridTime accumulator;
 | 
			
		||||
  GridUsecs accumulator;
 | 
			
		||||
public:
 | 
			
		||||
  GridStopWatch () { 
 | 
			
		||||
    Reset();
 | 
			
		||||
@@ -34,17 +68,21 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  void     Stop(void)  { 
 | 
			
		||||
    assert(running == true);
 | 
			
		||||
    accumulator+= std::chrono::duration_cast<GridTime>(GridClock::now()-start); 
 | 
			
		||||
    accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start); 
 | 
			
		||||
    running = false; 
 | 
			
		||||
  };
 | 
			
		||||
  void     Reset(void){
 | 
			
		||||
    running = false;
 | 
			
		||||
    start = GridClock::now();
 | 
			
		||||
    accumulator = std::chrono::duration_cast<GridTime>(start-start); 
 | 
			
		||||
    accumulator = std::chrono::duration_cast<GridUsecs>(start-start); 
 | 
			
		||||
  }
 | 
			
		||||
  GridTime Elapsed(void) {
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return accumulator;
 | 
			
		||||
    return std::chrono::duration_cast<GridTime>( accumulator );
 | 
			
		||||
  }
 | 
			
		||||
  uint64_t useconds(void){
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return (uint64_t) accumulator.count();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/CoarsenedMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_COARSENED_MATRIX_H
 | 
			
		||||
#define  GRID_ALGORITHM_COARSENED_MATRIX_H
 | 
			
		||||
 | 
			
		||||
@@ -117,6 +147,56 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
      Orthogonalise();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void CreateSubspaceLanczos(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) 
 | 
			
		||||
    {
 | 
			
		||||
      // Run a Lanczos with sloppy convergence
 | 
			
		||||
	const int Nstop = nn;
 | 
			
		||||
	const int Nk = nn+20;
 | 
			
		||||
	const int Np = nn+20;
 | 
			
		||||
	const int Nm = Nk+Np;
 | 
			
		||||
	const int MaxIt= 10000;
 | 
			
		||||
	RealD resid = 1.0e-3;
 | 
			
		||||
 | 
			
		||||
	Chebyshev<FineField> Cheb(0.5,64.0,21);
 | 
			
		||||
	ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
 | 
			
		||||
	//	IRL.lock = 1;
 | 
			
		||||
 | 
			
		||||
	FineField noise(FineGrid); gaussian(RNG,noise);
 | 
			
		||||
	FineField tmp(FineGrid); 
 | 
			
		||||
	std::vector<RealD>     eval(Nm);
 | 
			
		||||
	std::vector<FineField> evec(Nm,FineGrid);
 | 
			
		||||
 | 
			
		||||
	int Nconv;
 | 
			
		||||
	IRL.calc(eval,evec,
 | 
			
		||||
		 noise,
 | 
			
		||||
		 Nconv);
 | 
			
		||||
 | 
			
		||||
    	// pull back nn vectors
 | 
			
		||||
	for(int b=0;b<nn;b++){
 | 
			
		||||
 | 
			
		||||
	  subspace[b]   = evec[b];
 | 
			
		||||
 | 
			
		||||
	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  hermop.Op(subspace[b],tmp); 
 | 
			
		||||
	  std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  noise = tmp -  sqrt(eval[b])*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  noise = tmp +  eval[b]*subspace[b] ;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	Orthogonalise();
 | 
			
		||||
	for(int b=0;b<nn;b++){
 | 
			
		||||
	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
 | 
			
		||||
 | 
			
		||||
      RealD scale;
 | 
			
		||||
@@ -170,11 +250,10 @@ namespace Grid {
 | 
			
		||||
    ////////////////////
 | 
			
		||||
    Geometry         geom;
 | 
			
		||||
    GridBase *       _grid; 
 | 
			
		||||
    CartesianStencil<siteVector,siteVector,SimpleCompressor<siteVector> > Stencil; 
 | 
			
		||||
    CartesianStencil<siteVector,siteVector> Stencil; 
 | 
			
		||||
 | 
			
		||||
    std::vector<CoarseMatrix> A;
 | 
			
		||||
 | 
			
		||||
    std::vector<siteVector,alignedAllocator<siteVector> >   comm_buf;
 | 
			
		||||
      
 | 
			
		||||
    ///////////////////////
 | 
			
		||||
    // Interface
 | 
			
		||||
@@ -187,7 +266,7 @@ namespace Grid {
 | 
			
		||||
      conformable(in._grid,out._grid);
 | 
			
		||||
 | 
			
		||||
      SimpleCompressor<siteVector> compressor;
 | 
			
		||||
      Stencil.HaloExchange(in,comm_buf,compressor);
 | 
			
		||||
      Stencil.HaloExchange(in,compressor);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int ss=0;ss<Grid()->oSites();ss++){
 | 
			
		||||
@@ -204,7 +283,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
	  } else if(SE->_is_local) { 
 | 
			
		||||
	    nbr = in._odata[SE->_offset];
 | 
			
		||||
	  } else {
 | 
			
		||||
	    nbr = comm_buf[SE->_offset];
 | 
			
		||||
	    nbr = Stencil.comm_buf[SE->_offset];
 | 
			
		||||
	  }
 | 
			
		||||
	  res = res + A[point]._odata[ss]*nbr;
 | 
			
		||||
	}
 | 
			
		||||
@@ -228,7 +307,6 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
      Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
 | 
			
		||||
      A(geom.npoint,&CoarseGrid)
 | 
			
		||||
    {
 | 
			
		||||
      comm_buf.resize(Stencil._unified_buffer_size);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/LinearOperator.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_LINEAR_OP_H
 | 
			
		||||
#define  GRID_ALGORITHM_LINEAR_OP_H
 | 
			
		||||
 | 
			
		||||
@@ -194,6 +222,7 @@ namespace Grid {
 | 
			
		||||
      SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
//	std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << "  _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
 | 
			
		||||
 | 
			
		||||
	_Mat.Meooe(in,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
@@ -223,10 +252,10 @@ namespace Grid {
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.Meooe(in,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(in,out);
 | 
			
		||||
	_Mat.MooeeInv(out,tmp);
 | 
			
		||||
	_Mat.Meooe(tmp,out);
 | 
			
		||||
	_Mat.MooeeInv(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
@@ -242,6 +271,35 @@ namespace Grid {
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class Matrix,class Field>
 | 
			
		||||
      class SchurDiagTwoOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
    protected:
 | 
			
		||||
      Matrix &_Mat;
 | 
			
		||||
    public:
 | 
			
		||||
      SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
 | 
			
		||||
      virtual  RealD Mpc      (const Field &in, Field &out) {
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.MooeeInv(in,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
	_Mat.MooeeInv(tmp,out);
 | 
			
		||||
	_Mat.Meooe(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
      virtual  RealD MpcDag   (const Field &in, Field &out){
 | 
			
		||||
	Field tmp(in._grid);
 | 
			
		||||
 | 
			
		||||
	_Mat.MeooeDag(in,out);
 | 
			
		||||
	_Mat.MooeeInvDag(out,tmp);
 | 
			
		||||
	_Mat.MeooeDag(tmp,out);
 | 
			
		||||
	_Mat.MooeeInvDag(out,tmp);
 | 
			
		||||
 | 
			
		||||
	return axpy_norm(out,-1.0,tmp,in);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////////////
 | 
			
		||||
    // Base classes for functions of operators
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/Preconditioner.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PRECONDITIONER_H
 | 
			
		||||
#define GRID_PRECONDITIONER_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/SparseMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef  GRID_ALGORITHM_SPARSE_MATRIX_H
 | 
			
		||||
#define  GRID_ALGORITHM_SPARSE_MATRIX_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/Chebyshev.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CHEBYSHEV_H
 | 
			
		||||
#define GRID_CHEBYSHEV_H
 | 
			
		||||
 | 
			
		||||
@@ -30,13 +58,14 @@ namespace Grid {
 | 
			
		||||
      Field Mtmp(in._grid);
 | 
			
		||||
      AtoN = in;
 | 
			
		||||
      out = AtoN*Coeffs[0];
 | 
			
		||||
      //      std::cout <<"Poly in " <<norm2(in)<<std::endl;
 | 
			
		||||
      //      std::cout <<"0 " <<norm2(out)<<std::endl;
 | 
			
		||||
//            std::cout <<"Poly in " <<norm2(in)<<" size "<< Coeffs.size()<<std::endl;
 | 
			
		||||
//            std::cout <<"Coeffs[0]= "<<Coeffs[0]<< " 0 " <<norm2(out)<<std::endl;
 | 
			
		||||
      for(int n=1;n<Coeffs.size();n++){
 | 
			
		||||
	Mtmp = AtoN;
 | 
			
		||||
	Linop.HermOp(Mtmp,AtoN);
 | 
			
		||||
	out=out+AtoN*Coeffs[n];
 | 
			
		||||
	//	std::cout << n<<" " <<norm2(out)<<std::endl;
 | 
			
		||||
//            std::cout <<"Coeffs "<<n<<"= "<< Coeffs[n]<< " 0 " <<std::endl;
 | 
			
		||||
//		std::cout << n<<" " <<norm2(out)<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
@@ -54,7 +83,8 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
    void csv(std::ostream &out){
 | 
			
		||||
      for (RealD x=lo; x<hi; x+=(hi-lo)/1000) {
 | 
			
		||||
	RealD diff = hi-lo;
 | 
			
		||||
      for (RealD x=lo-0.2*diff; x<hi+0.2*diff; x+=(hi-lo)/1000) {
 | 
			
		||||
	RealD f = approx(x);
 | 
			
		||||
	out<< x<<" "<<f<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
@@ -71,10 +101,24 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
    Chebyshev(){};
 | 
			
		||||
    Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
 | 
			
		||||
    
 | 
			
		||||
    Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// CJ: the one we need for Lanczos
 | 
			
		||||
    void Init(RealD _lo,RealD _hi,int _order)
 | 
			
		||||
    {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
      hi=_hi;
 | 
			
		||||
      order=_order;
 | 
			
		||||
      
 | 
			
		||||
      if(order < 2) exit(-1);
 | 
			
		||||
      Coeffs.resize(order);
 | 
			
		||||
      Coeffs.assign(0.,order);
 | 
			
		||||
      Coeffs[order-1] = 1.;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
 | 
			
		||||
    {
 | 
			
		||||
      lo=_lo;
 | 
			
		||||
@@ -154,6 +198,8 @@ namespace Grid {
 | 
			
		||||
    void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
 | 
			
		||||
 | 
			
		||||
      GridBase *grid=in._grid;
 | 
			
		||||
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
 | 
			
		||||
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
 | 
			
		||||
 | 
			
		||||
      int vol=grid->gSites();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/MultiShiftFunction.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef MULTI_SHIFT_FUNCTION
 | 
			
		||||
#define MULTI_SHIFT_FUNCTION
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -16,9 +16,13 @@
 | 
			
		||||
#define INCLUDED_ALG_REMEZ_H
 | 
			
		||||
 | 
			
		||||
#include <stddef.h>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
 | 
			
		||||
//#include <algorithms/approx/bigfloat.h>
 | 
			
		||||
#ifdef HAVE_GMP_H
 | 
			
		||||
#include <algorithms/approx/bigfloat.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <algorithms/approx/bigfloat_double.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
 | 
			
		||||
#define SUM_MAX 10 // Maximum number of terms in exponential
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/approx/bigfloat_double.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <math.h>
 | 
			
		||||
 | 
			
		||||
typedef double mfloat; 
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/AdefGeneric.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateGradient.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_GRADIENT_H
 | 
			
		||||
#define GRID_CONJUGATE_GRADIENT_H
 | 
			
		||||
 | 
			
		||||
@@ -30,7 +59,8 @@ public:
 | 
			
		||||
      
 | 
			
		||||
      //Initial residual computation & set up
 | 
			
		||||
      RealD guess = norm2(psi);
 | 
			
		||||
      
 | 
			
		||||
      assert(std::isnan(guess)==0);
 | 
			
		||||
 | 
			
		||||
      Linop.HermOpAndNorm(psi,mmp,d,b);
 | 
			
		||||
      
 | 
			
		||||
      r= src-mmp;
 | 
			
		||||
@@ -54,17 +84,25 @@ public:
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
 | 
			
		||||
      
 | 
			
		||||
      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
 | 
			
		||||
 | 
			
		||||
      GridStopWatch LinalgTimer;
 | 
			
		||||
      GridStopWatch MatrixTimer;
 | 
			
		||||
      GridStopWatch SolverTimer;
 | 
			
		||||
 | 
			
		||||
      SolverTimer.Start();
 | 
			
		||||
      int k;
 | 
			
		||||
      for (k=1;k<=MaxIterations;k++){
 | 
			
		||||
	
 | 
			
		||||
	c=cp;
 | 
			
		||||
	
 | 
			
		||||
	Linop.HermOpAndNorm(p,mmp,d,qq);
 | 
			
		||||
 | 
			
		||||
	RealD    qqck = norm2(mmp);
 | 
			
		||||
	ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
	MatrixTimer.Start();
 | 
			
		||||
	Linop.HermOpAndNorm(p,mmp,d,qq);
 | 
			
		||||
	MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	LinalgTimer.Start();
 | 
			
		||||
	//	RealD    qqck = norm2(mmp);
 | 
			
		||||
	//	ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
      
 | 
			
		||||
	a      = c/d;
 | 
			
		||||
	b_pred = a*(a*qq-d)/c;
 | 
			
		||||
@@ -76,11 +114,13 @@ public:
 | 
			
		||||
	psi= a*p+psi;
 | 
			
		||||
	p  = p*b+r;
 | 
			
		||||
	  
 | 
			
		||||
	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
 | 
			
		||||
	LinalgTimer.Stop();
 | 
			
		||||
	std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
 | 
			
		||||
	
 | 
			
		||||
	// Stopping condition
 | 
			
		||||
	if ( cp <= rsq ) { 
 | 
			
		||||
	  
 | 
			
		||||
	  SolverTimer.Stop();
 | 
			
		||||
	  Linop.HermOpAndNorm(psi,mmp,d,qq);
 | 
			
		||||
	  p=mmp-src;
 | 
			
		||||
	  
 | 
			
		||||
@@ -92,8 +132,13 @@ public:
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
 | 
			
		||||
		   <<" computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
		   <<" true residual     "<<true_residual
 | 
			
		||||
		   <<" true residual "    <<true_residual
 | 
			
		||||
		   <<" target "<<Tolerance<<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix  "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
 | 
			
		||||
	  std::cout<<std::endl;
 | 
			
		||||
	  
 | 
			
		||||
	  assert(true_residual/Tolerance < 1000.0);
 | 
			
		||||
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
 | 
			
		||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
 | 
			
		||||
 | 
			
		||||
@@ -215,6 +243,8 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
      if ( (!converged[s]) ){
 | 
			
		||||
	
 | 
			
		||||
	RealD css  = c * z[s][iz]* z[s][iz];
 | 
			
		||||
    if((k%100)==0 && (s==0) )
 | 
			
		||||
	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" css " <<css <<std::endl;
 | 
			
		||||
	
 | 
			
		||||
	if(css<rsq[s]){
 | 
			
		||||
	  if ( ! converged[s] )
 | 
			
		||||
@@ -246,7 +276,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
 | 
			
		||||
  }
 | 
			
		||||
  // ugly hack
 | 
			
		||||
  std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
 | 
			
		||||
  assert(0);
 | 
			
		||||
//  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
 | 
			
		||||
#define GRID_CONJUGATE_RESIDUAL_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/DenseMatrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_DENSE_MATRIX_H
 | 
			
		||||
#define GRID_DENSE_MATRIX_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/EigenSort.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_EIGENSORT_H
 | 
			
		||||
#define GRID_EIGENSORT_H
 | 
			
		||||
 | 
			
		||||
@@ -11,32 +38,34 @@ template<class Field>
 | 
			
		||||
class SortEigen {
 | 
			
		||||
 private:
 | 
			
		||||
  
 | 
			
		||||
//hacking for testing for now
 | 
			
		||||
 private:
 | 
			
		||||
  static bool less_lmd(RealD left,RealD right){
 | 
			
		||||
    return fabs(left) < fabs(right);
 | 
			
		||||
    return left > right;
 | 
			
		||||
  }  
 | 
			
		||||
  static bool less_pair(std::pair<RealD,Field>& left,
 | 
			
		||||
		 std::pair<RealD,Field>& right){
 | 
			
		||||
    return fabs(left.first) < fabs(right.first);
 | 
			
		||||
  static bool less_pair(std::pair<RealD,Field const*>& left,
 | 
			
		||||
                        std::pair<RealD,Field const*>& right){
 | 
			
		||||
    return left.first > (right.first);
 | 
			
		||||
  }  
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  void push(DenseVector<RealD>& lmd,
 | 
			
		||||
	    DenseVector<Field>& evec,int N) {
 | 
			
		||||
 | 
			
		||||
    DenseVector<std::pair<RealD, Field> > emod;
 | 
			
		||||
    typename DenseVector<std::pair<RealD, Field> >::iterator it;
 | 
			
		||||
            DenseVector<Field>& evec,int N) {
 | 
			
		||||
    DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
 | 
			
		||||
    for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0;i<lmd.size();++i){
 | 
			
		||||
      emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
 | 
			
		||||
    }
 | 
			
		||||
    DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());    
 | 
			
		||||
    for(int i=0;i<lmd.size();++i)
 | 
			
		||||
      emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
 | 
			
		||||
 | 
			
		||||
    partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
 | 
			
		||||
 | 
			
		||||
    it=emod.begin();
 | 
			
		||||
    typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
 | 
			
		||||
    for(int i=0;i<N;++i){
 | 
			
		||||
      lmd[i]=it->first;
 | 
			
		||||
      evec[i]=it->second;
 | 
			
		||||
      evec[i]=*(it->second);
 | 
			
		||||
      ++it;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Francis.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef FRANCIS_H
 | 
			
		||||
#define FRANCIS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Householder.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef HOUSEHOLDER_H
 | 
			
		||||
#define HOUSEHOLDER_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,38 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_IRL_H
 | 
			
		||||
#define GRID_IRL_H
 | 
			
		||||
 | 
			
		||||
#include <string.h> //memset
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
#include <lapacke.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include <algorithms/iterative/DenseMatrix.h>
 | 
			
		||||
#include <algorithms/iterative/EigenSort.h>
 | 
			
		||||
 | 
			
		||||
@@ -21,6 +53,7 @@ public:
 | 
			
		||||
    int Niter;
 | 
			
		||||
    int converged;
 | 
			
		||||
 | 
			
		||||
    int Nstop;   // Number of evecs checked for convergence
 | 
			
		||||
    int Nk;      // Number of converged sought
 | 
			
		||||
    int Np;      // Np -- Number of spare vecs in kryloc space
 | 
			
		||||
    int Nm;      // Nm -- total number of vectors
 | 
			
		||||
@@ -29,6 +62,8 @@ public:
 | 
			
		||||
 | 
			
		||||
    SortEigen<Field> _sort;
 | 
			
		||||
 | 
			
		||||
//    GridCartesian &_fgrid;
 | 
			
		||||
 | 
			
		||||
    LinearOperatorBase<Field> &_Linop;
 | 
			
		||||
 | 
			
		||||
    OperatorFunction<Field>   &_poly;
 | 
			
		||||
@@ -39,7 +74,27 @@ public:
 | 
			
		||||
    void init(void){};
 | 
			
		||||
    void Abort(int ff, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs);
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos(LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
    ImplicitlyRestartedLanczos(
 | 
			
		||||
				LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
			       OperatorFunction<Field> & poly,   // polynmial
 | 
			
		||||
			       int _Nstop, // sought vecs
 | 
			
		||||
			       int _Nk, // sought vecs
 | 
			
		||||
			       int _Nm, // spare vecs
 | 
			
		||||
			       RealD _eresid, // resid in lmdue deficit 
 | 
			
		||||
			       int _Niter) : // Max iterations
 | 
			
		||||
      _Linop(Linop),
 | 
			
		||||
      _poly(poly),
 | 
			
		||||
      Nstop(_Nstop),
 | 
			
		||||
      Nk(_Nk),
 | 
			
		||||
      Nm(_Nm),
 | 
			
		||||
      eresid(_eresid),
 | 
			
		||||
      Niter(_Niter)
 | 
			
		||||
    { 
 | 
			
		||||
      Np = Nm-Nk; assert(Np>0);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    ImplicitlyRestartedLanczos(
 | 
			
		||||
				LinearOperatorBase<Field> &Linop, // op
 | 
			
		||||
			       OperatorFunction<Field> & poly,   // polynmial
 | 
			
		||||
			       int _Nk, // sought vecs
 | 
			
		||||
			       int _Nm, // spare vecs
 | 
			
		||||
@@ -47,6 +102,7 @@ public:
 | 
			
		||||
			       int _Niter) : // Max iterations
 | 
			
		||||
      _Linop(Linop),
 | 
			
		||||
      _poly(poly),
 | 
			
		||||
      Nstop(_Nk),
 | 
			
		||||
      Nk(_Nk),
 | 
			
		||||
      Nm(_Nm),
 | 
			
		||||
      eresid(_eresid),
 | 
			
		||||
@@ -114,10 +170,11 @@ public:
 | 
			
		||||
      RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
 | 
			
		||||
                                 // 7. vk+1 := wk/βk+1
 | 
			
		||||
 | 
			
		||||
//	std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl;
 | 
			
		||||
      const RealD tiny = 1.0e-20;
 | 
			
		||||
      if ( beta < tiny ) { 
 | 
			
		||||
	std::cout << " beta is tiny "<<beta<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
     }
 | 
			
		||||
      lmd[k] = alph;
 | 
			
		||||
      lme[k]  = beta;
 | 
			
		||||
 | 
			
		||||
@@ -191,15 +248,122 @@ public:
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    void diagonalize_lapack(DenseVector<RealD>& lmd,
 | 
			
		||||
		     DenseVector<RealD>& lme, 
 | 
			
		||||
		     int N1,
 | 
			
		||||
		     int N2,
 | 
			
		||||
		     DenseVector<RealD>& Qt,
 | 
			
		||||
		     GridBase *grid){
 | 
			
		||||
  const int size = Nm;
 | 
			
		||||
//  tevals.resize(size);
 | 
			
		||||
//  tevecs.resize(size);
 | 
			
		||||
  int NN = N1;
 | 
			
		||||
  double evals_tmp[NN];
 | 
			
		||||
  double evec_tmp[NN][NN];
 | 
			
		||||
  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
 | 
			
		||||
//  double AA[NN][NN];
 | 
			
		||||
  double DD[NN];
 | 
			
		||||
  double EE[NN];
 | 
			
		||||
  for (int i = 0; i< NN; i++)
 | 
			
		||||
    for (int j = i - 1; j <= i + 1; j++)
 | 
			
		||||
      if ( j < NN && j >= 0 ) {
 | 
			
		||||
        if (i==j) DD[i] = lmd[i];
 | 
			
		||||
        if (i==j) evals_tmp[i] = lmd[i];
 | 
			
		||||
        if (j==(i-1)) EE[j] = lme[j];
 | 
			
		||||
      }
 | 
			
		||||
  int evals_found;
 | 
			
		||||
  int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
 | 
			
		||||
  int liwork =  3+NN*10 ;
 | 
			
		||||
  int iwork[liwork];
 | 
			
		||||
  double work[lwork];
 | 
			
		||||
  int isuppz[2*NN];
 | 
			
		||||
  char jobz = 'V'; // calculate evals & evecs
 | 
			
		||||
  char range = 'I'; // calculate all evals
 | 
			
		||||
  //    char range = 'A'; // calculate all evals
 | 
			
		||||
  char uplo = 'U'; // refer to upper half of original matrix
 | 
			
		||||
  char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
 | 
			
		||||
  int ifail[NN];
 | 
			
		||||
  int info;
 | 
			
		||||
//  int total = QMP_get_number_of_nodes();
 | 
			
		||||
//  int node = QMP_get_node_number();
 | 
			
		||||
//  GridBase *grid = evec[0]._grid;
 | 
			
		||||
  int total = grid->_Nprocessors;
 | 
			
		||||
  int node = grid->_processor;
 | 
			
		||||
  int interval = (NN/total)+1;
 | 
			
		||||
  double vl = 0.0, vu = 0.0;
 | 
			
		||||
  int il = interval*node+1 , iu = interval*(node+1);
 | 
			
		||||
  if (iu > NN)  iu=NN;
 | 
			
		||||
  double tol = 0.0;
 | 
			
		||||
    if (1) {
 | 
			
		||||
      memset(evals_tmp,0,sizeof(double)*NN);
 | 
			
		||||
      if ( il <= NN){
 | 
			
		||||
        printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
 | 
			
		||||
        LAPACK_dstegr(&jobz, &range, &NN,
 | 
			
		||||
            (double*)DD, (double*)EE,
 | 
			
		||||
            &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
 | 
			
		||||
            &tol, // tolerance
 | 
			
		||||
            &evals_found, evals_tmp, (double*)evec_tmp, &NN,
 | 
			
		||||
            isuppz,
 | 
			
		||||
            work, &lwork, iwork, &liwork,
 | 
			
		||||
            &info);
 | 
			
		||||
        for (int i = iu-1; i>= il-1; i--){
 | 
			
		||||
          printf("node=%d evals_found=%d evals_tmp[%d] = %g\n",node,evals_found, i - (il-1),evals_tmp[i - (il-1)]);
 | 
			
		||||
          evals_tmp[i] = evals_tmp[i - (il-1)];
 | 
			
		||||
          if (il>1) evals_tmp[i-(il-1)]=0.;
 | 
			
		||||
          for (int j = 0; j< NN; j++){
 | 
			
		||||
            evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
 | 
			
		||||
            if (il>1) evec_tmp[i-(il-1)][j]=0.;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      {
 | 
			
		||||
//        QMP_sum_double_array(evals_tmp,NN);
 | 
			
		||||
//        QMP_sum_double_array((double *)evec_tmp,NN*NN);
 | 
			
		||||
         grid->GlobalSumVector(evals_tmp,NN);
 | 
			
		||||
         grid->GlobalSumVector((double*)evec_tmp,NN*NN);
 | 
			
		||||
      }
 | 
			
		||||
    } 
 | 
			
		||||
// cheating a bit. It is better to sort instead of just reversing it, but the document of the routine says evals are sorted in increasing order. qr gives evals in decreasing order.
 | 
			
		||||
  for(int i=0;i<NN;i++){
 | 
			
		||||
    for(int j=0;j<NN;j++)
 | 
			
		||||
      Qt[(NN-1-i)*N2+j]=evec_tmp[i][j];
 | 
			
		||||
      lmd [NN-1-i]=evals_tmp[i];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    void diagonalize(DenseVector<RealD>& lmd,
 | 
			
		||||
		     DenseVector<RealD>& lme, 
 | 
			
		||||
		     int Nm2,
 | 
			
		||||
		     int Nm,
 | 
			
		||||
		     DenseVector<RealD>& Qt)
 | 
			
		||||
		     int N2,
 | 
			
		||||
		     int N1,
 | 
			
		||||
		     DenseVector<RealD>& Qt,
 | 
			
		||||
		     GridBase *grid)
 | 
			
		||||
    {
 | 
			
		||||
      int Niter = 100*Nm;
 | 
			
		||||
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    const int check_lapack=0; // just use lapack if 0, check against lapack if 1
 | 
			
		||||
 | 
			
		||||
    if(!check_lapack)
 | 
			
		||||
	return diagonalize_lapack(lmd,lme,N2,N1,Qt,grid);
 | 
			
		||||
 | 
			
		||||
	DenseVector <RealD> lmd2(N1);
 | 
			
		||||
	DenseVector <RealD> lme2(N1);
 | 
			
		||||
	DenseVector<RealD> Qt2(N1*N1);
 | 
			
		||||
         for(int k=0; k<N1; ++k){
 | 
			
		||||
	    lmd2[k] = lmd[k];
 | 
			
		||||
	    lme2[k] = lme[k];
 | 
			
		||||
	  }
 | 
			
		||||
         for(int k=0; k<N1*N1; ++k)
 | 
			
		||||
	Qt2[k] = Qt[k];
 | 
			
		||||
 | 
			
		||||
//	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
      int Niter = 100*N1;
 | 
			
		||||
      int kmin = 1;
 | 
			
		||||
      int kmax = Nk;
 | 
			
		||||
      int kmax = N2;
 | 
			
		||||
      // (this should be more sophisticated)
 | 
			
		||||
 | 
			
		||||
      for(int iter=0; iter<Niter; ++iter){
 | 
			
		||||
@@ -211,7 +375,7 @@ public:
 | 
			
		||||
	// (Dsh: shift)
 | 
			
		||||
	
 | 
			
		||||
	// transformation
 | 
			
		||||
	qr_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax);
 | 
			
		||||
	qr_decomp(lmd,lme,N2,N1,Qt,Dsh,kmin,kmax);
 | 
			
		||||
	
 | 
			
		||||
	// Convergence criterion (redef of kmin and kamx)
 | 
			
		||||
	for(int j=kmax-1; j>= kmin; --j){
 | 
			
		||||
@@ -222,6 +386,23 @@ public:
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	Niter = iter;
 | 
			
		||||
#ifdef USE_LAPACK
 | 
			
		||||
    if(check_lapack){
 | 
			
		||||
	const double SMALL=1e-8;
 | 
			
		||||
	diagonalize_lapack(lmd2,lme2,N2,N1,Qt2,grid);
 | 
			
		||||
	DenseVector <RealD> lmd3(N2);
 | 
			
		||||
         for(int k=0; k<N2; ++k) lmd3[k]=lmd[k];
 | 
			
		||||
        _sort.push(lmd3,N2);
 | 
			
		||||
        _sort.push(lmd2,N2);
 | 
			
		||||
         for(int k=0; k<N2; ++k){
 | 
			
		||||
	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
 | 
			
		||||
//	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
         for(int k=0; k<N1*N1; ++k){
 | 
			
		||||
//	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
	return;
 | 
			
		||||
 | 
			
		||||
      continued:
 | 
			
		||||
@@ -237,6 +418,7 @@ public:
 | 
			
		||||
      abort();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
    static RealD normalise(Field& v) 
 | 
			
		||||
    {
 | 
			
		||||
      RealD nn = norm2(v);
 | 
			
		||||
@@ -298,6 +480,7 @@ until convergence
 | 
			
		||||
      {
 | 
			
		||||
 | 
			
		||||
	GridBase *grid = evec[0]._grid;
 | 
			
		||||
	assert(grid == src._grid);
 | 
			
		||||
 | 
			
		||||
	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
 | 
			
		||||
	std::cout << " -- Nm = " << Nm << std::endl;
 | 
			
		||||
@@ -328,11 +511,21 @@ until convergence
 | 
			
		||||
	// (uniform vector) Why not src??
 | 
			
		||||
	//	evec[0] = 1.0;
 | 
			
		||||
	evec[0] = src;
 | 
			
		||||
	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
 | 
			
		||||
// << src._grid  << std::endl;
 | 
			
		||||
	normalise(evec[0]);
 | 
			
		||||
	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
 | 
			
		||||
// << evec[0]._grid << std::endl;
 | 
			
		||||
	
 | 
			
		||||
	// Initial Nk steps
 | 
			
		||||
	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
 | 
			
		||||
//	std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
 | 
			
		||||
//	std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
 | 
			
		||||
	RitzMatrix(evec,Nk);
 | 
			
		||||
	for(int k=0; k<Nk; ++k){
 | 
			
		||||
//	std:: cout <<"eval " << k << " " <<eval[k] << std::endl;
 | 
			
		||||
//	std:: cout <<"lme " << k << " " << lme[k] << std::endl;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Restarting loop begins
 | 
			
		||||
	for(int iter = 0; iter<Niter; ++iter){
 | 
			
		||||
@@ -354,20 +547,24 @@ until convergence
 | 
			
		||||
	    lme2[k] = lme[k+k1-1];
 | 
			
		||||
	  }
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nm,Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
 | 
			
		||||
 | 
			
		||||
	  // sorting
 | 
			
		||||
	  _sort.push(eval2,Nm);
 | 
			
		||||
	  
 | 
			
		||||
	  // Implicitly shifted QR transformations
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  for(int ip=k2; ip<Nm; ++ip) 
 | 
			
		||||
	  for(int ip=k2; ip<Nm; ++ip){ 
 | 
			
		||||
	std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
 | 
			
		||||
	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
 | 
			
		||||
		
 | 
			
		||||
	}
 | 
			
		||||
    
 | 
			
		||||
	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
 | 
			
		||||
	  
 | 
			
		||||
	  for(int j=k1-1; j<k2+1; ++j){
 | 
			
		||||
	    for(int k=0; k<Nm; ++k){
 | 
			
		||||
	    B[j].checkerboard = evec[k].checkerboard;
 | 
			
		||||
	      B[j] += Qt[k+Nm*j] * evec[k];
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
@@ -390,21 +587,25 @@ until convergence
 | 
			
		||||
	    lme2[k] = lme[k];
 | 
			
		||||
	  }
 | 
			
		||||
	  setUnit_Qt(Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nk,Nm,Qt);
 | 
			
		||||
	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
 | 
			
		||||
	  
 | 
			
		||||
	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 | 
			
		||||
	  
 | 
			
		||||
	  for(int j = 0; j<Nk; ++j){
 | 
			
		||||
	    for(int k = 0; k<Nk; ++k){
 | 
			
		||||
	    B[j].checkerboard = evec[k].checkerboard;
 | 
			
		||||
	      B[j] += Qt[k+j*Nm] * evec[k];
 | 
			
		||||
	    }
 | 
			
		||||
//	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
 | 
			
		||||
	  }
 | 
			
		||||
//	_sort.push(eval2,B,Nk);
 | 
			
		||||
 | 
			
		||||
	  Nconv = 0;
 | 
			
		||||
	  //	  std::cout << std::setiosflags(std::ios_base::scientific);
 | 
			
		||||
	  for(int i=0; i<Nk; ++i){
 | 
			
		||||
 | 
			
		||||
	    _poly(_Linop,B[i],v);
 | 
			
		||||
//	    _poly(_Linop,B[i],v);
 | 
			
		||||
	    _Linop.HermOp(B[i],v);
 | 
			
		||||
	    
 | 
			
		||||
	    RealD vnum = real(innerProduct(B[i],v)); // HermOp.
 | 
			
		||||
	    RealD vden = norm2(B[i]);
 | 
			
		||||
@@ -412,11 +613,13 @@ until convergence
 | 
			
		||||
	    v -= eval2[i]*B[i];
 | 
			
		||||
	    RealD vv = norm2(v);
 | 
			
		||||
	    
 | 
			
		||||
	    std::cout.precision(13);
 | 
			
		||||
	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
 | 
			
		||||
	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
 | 
			
		||||
	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
 | 
			
		||||
	    
 | 
			
		||||
	    if(vv<eresid*eresid){
 | 
			
		||||
	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 | 
			
		||||
	    if((vv<eresid*eresid) && (i == Nconv) ){
 | 
			
		||||
	      Iconv[Nconv] = i;
 | 
			
		||||
	      ++Nconv;
 | 
			
		||||
	    }
 | 
			
		||||
@@ -427,7 +630,7 @@ until convergence
 | 
			
		||||
 | 
			
		||||
	  std::cout<<" #modes converged: "<<Nconv<<std::endl;
 | 
			
		||||
 | 
			
		||||
	  if( Nconv>=Nk ){
 | 
			
		||||
	  if( Nconv>=Nstop ){
 | 
			
		||||
	    goto converged;
 | 
			
		||||
	  }
 | 
			
		||||
	} // end of iter loop
 | 
			
		||||
@@ -436,21 +639,20 @@ until convergence
 | 
			
		||||
	abort();
 | 
			
		||||
	
 | 
			
		||||
      converged:
 | 
			
		||||
	// Sorting
 | 
			
		||||
	
 | 
			
		||||
	eval.clear();
 | 
			
		||||
	evec.clear();
 | 
			
		||||
	for(int i=0; i<Nconv; ++i){
 | 
			
		||||
	  eval.push_back(eval2[Iconv[i]]);
 | 
			
		||||
	  evec.push_back(B[Iconv[i]]);
 | 
			
		||||
	}
 | 
			
		||||
	_sort.push(eval,evec,Nconv);
 | 
			
		||||
	
 | 
			
		||||
	std::cout << "\n Converged\n Summary :\n";
 | 
			
		||||
	std::cout << " -- Iterations  = "<< Nconv  << "\n";
 | 
			
		||||
	std::cout << " -- beta(k)     = "<< beta_k << "\n";
 | 
			
		||||
	std::cout << " -- Nconv       = "<< Nconv  << "\n";
 | 
			
		||||
      }
 | 
			
		||||
       // Sorting
 | 
			
		||||
       eval.resize(Nconv);
 | 
			
		||||
       evec.resize(Nconv,grid);
 | 
			
		||||
       for(int i=0; i<Nconv; ++i){
 | 
			
		||||
         eval[i] = eval2[Iconv[i]];
 | 
			
		||||
         evec[i] = B[Iconv[i]];
 | 
			
		||||
       }
 | 
			
		||||
      _sort.push(eval,evec,Nconv);
 | 
			
		||||
 | 
			
		||||
      std::cout << "\n Converged\n Summary :\n";
 | 
			
		||||
      std::cout << " -- Iterations  = "<< Nconv  << "\n";
 | 
			
		||||
      std::cout << " -- beta(k)     = "<< beta_k << "\n";
 | 
			
		||||
      std::cout << " -- Nconv       = "<< Nconv  << "\n";
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////
 | 
			
		||||
    // Adapted from Rudy's lanczos factor routine
 | 
			
		||||
@@ -997,6 +1199,7 @@ static void Lock(DenseMatrix<T> &H, 	///Hess mtx
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/Matrix.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef MATRIX_H
 | 
			
		||||
#define MATRIX_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/MatrixUtils.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_MATRIX_UTILS_H
 | 
			
		||||
#define GRID_MATRIX_UTILS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/NormalEquations.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_NORMAL_EQUATIONS_H
 | 
			
		||||
#define GRID_NORMAL_EQUATIONS_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
 | 
			
		||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PREC_GCR_H
 | 
			
		||||
#define GRID_PREC_GCR_H
 | 
			
		||||
 | 
			
		||||
@@ -19,6 +47,10 @@ namespace Grid {
 | 
			
		||||
    int mmax;
 | 
			
		||||
    int nstep;
 | 
			
		||||
    int steps;
 | 
			
		||||
    GridStopWatch PrecTimer;
 | 
			
		||||
    GridStopWatch MatTimer;
 | 
			
		||||
    GridStopWatch LinalgTimer;
 | 
			
		||||
 | 
			
		||||
    LinearFunction<Field> &Preconditioner;
 | 
			
		||||
 | 
			
		||||
   PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : 
 | 
			
		||||
@@ -40,14 +72,24 @@ namespace Grid {
 | 
			
		||||
      
 | 
			
		||||
      Field r(src._grid);
 | 
			
		||||
 | 
			
		||||
        PrecTimer.Reset();
 | 
			
		||||
         MatTimer.Reset();
 | 
			
		||||
      LinalgTimer.Reset();
 | 
			
		||||
 | 
			
		||||
      GridStopWatch SolverTimer;
 | 
			
		||||
      SolverTimer.Start();
 | 
			
		||||
 | 
			
		||||
      steps=0;
 | 
			
		||||
      for(int k=0;k<MaxIterations;k++){
 | 
			
		||||
 | 
			
		||||
	cp=GCRnStep(Linop,src,psi,rsq);
 | 
			
		||||
 | 
			
		||||
	if ( verbose ) std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
 | 
			
		||||
 | 
			
		||||
	if(cp<rsq) {
 | 
			
		||||
 | 
			
		||||
	  SolverTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	  Linop.HermOp(psi,r);
 | 
			
		||||
	  axpy(r,-1.0,src,r);
 | 
			
		||||
	  RealD tr = norm2(r);
 | 
			
		||||
@@ -55,6 +97,11 @@ namespace Grid {
 | 
			
		||||
		   << " computed residual "<<sqrt(cp/ssq)
 | 
			
		||||
	           << " true residual "    <<sqrt(tr/ssq)
 | 
			
		||||
	           << " target "           <<Tolerance <<std::endl;
 | 
			
		||||
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total  "<< SolverTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<<   PrecTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<<    MatTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
 | 
			
		||||
	  return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@@ -62,6 +109,7 @@ namespace Grid {
 | 
			
		||||
      std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
 | 
			
		||||
      assert(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
 | 
			
		||||
 | 
			
		||||
      RealD cp;
 | 
			
		||||
@@ -88,24 +136,25 @@ namespace Grid {
 | 
			
		||||
      // initial guess x0 is taken as nonzero.
 | 
			
		||||
      // r0=src-A x0 = src
 | 
			
		||||
      //////////////////////////////////
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(psi,Az,zAz,zAAz); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
      r=src-Az;
 | 
			
		||||
      
 | 
			
		||||
      /////////////////////
 | 
			
		||||
      // p = Prec(r)
 | 
			
		||||
      /////////////////////
 | 
			
		||||
      PrecTimer.Start();
 | 
			
		||||
      Preconditioner(r,z);
 | 
			
		||||
      PrecTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner in " << norm2(r)<<std::endl; 
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner out " << norm2(z)<<std::endl; 
 | 
			
		||||
      
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOp(z,tmp); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner Aout " << norm2(tmp)<<std::endl; 
 | 
			
		||||
      ttmp=tmp;
 | 
			
		||||
      tmp=tmp-r;
 | 
			
		||||
 | 
			
		||||
      std::cout<<GridLogMessage<< " Preconditioner resid " << std::sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
      /*
 | 
			
		||||
      std::cout<<GridLogMessage<<r<<std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<<z<<std::endl;
 | 
			
		||||
@@ -113,7 +162,9 @@ namespace Grid {
 | 
			
		||||
      std::cout<<GridLogMessage<<tmp<<std::endl;
 | 
			
		||||
      */
 | 
			
		||||
 | 
			
		||||
      MatTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(z,Az,zAz,zAAz); 
 | 
			
		||||
      MatTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      //p[0],q[0],qq[0] 
 | 
			
		||||
      p[0]= z;
 | 
			
		||||
@@ -137,18 +188,22 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
	cp = axpy_norm(r,-a,q[peri_k],r);  
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<< " VPGCR_step resid" <<sqrt(cp/rsq)<<std::endl; 
 | 
			
		||||
	if((k==nstep-1)||(cp<rsq)){
 | 
			
		||||
	  return cp;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"]  resid " <<sqrt(cp/rsq)<<std::endl; 
 | 
			
		||||
 | 
			
		||||
	PrecTimer.Start();
 | 
			
		||||
	Preconditioner(r,z);// solve Az = r
 | 
			
		||||
	PrecTimer.Stop();
 | 
			
		||||
 | 
			
		||||
	MatTimer.Start();
 | 
			
		||||
	Linop.HermOpAndNorm(z,Az,zAz,zAAz);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	Linop.HermOp(z,tmp);
 | 
			
		||||
	MatTimer.Stop();
 | 
			
		||||
        tmp=tmp-r;
 | 
			
		||||
	std::cout<<GridLogMessage<< " Preconditioner resid" <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
	std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl; 
 | 
			
		||||
 | 
			
		||||
	q[peri_kp]=Az;
 | 
			
		||||
	p[peri_kp]=z;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/algorithms/iterative/SchurRedBlack.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
#define GRID_SCHUR_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
@@ -75,7 +102,9 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,src_e,in);
 | 
			
		||||
      pickCheckerboard(Odd ,src_o,in);
 | 
			
		||||
 | 
			
		||||
      pickCheckerboard(Even,sol_e,out);
 | 
			
		||||
      pickCheckerboard(Odd ,sol_o,out);
 | 
			
		||||
    
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
 | 
			
		||||
      /////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_base.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_BASE_H
 | 
			
		||||
#define GRID_CARTESIAN_BASE_H
 | 
			
		||||
 | 
			
		||||
@@ -87,27 +115,11 @@ public:
 | 
			
		||||
      for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
 | 
			
		||||
      return idx;
 | 
			
		||||
    }
 | 
			
		||||
    static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
 | 
			
		||||
      int nd= dims.size();
 | 
			
		||||
      coor.resize(nd);
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	coor[d] = index % dims[d];
 | 
			
		||||
	index   = index / dims[d];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
 | 
			
		||||
      CoorFromIndex(coor,Oindex,_rdimensions);
 | 
			
		||||
    }
 | 
			
		||||
    static inline void IndexFromCoor (std::vector<int>& coor,int &index,std::vector<int> &dims){
 | 
			
		||||
      int nd=dims.size();
 | 
			
		||||
      int stride=1;
 | 
			
		||||
      index=0;
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	index = index+stride*coor[d];
 | 
			
		||||
	stride=stride*dims[d];
 | 
			
		||||
      }
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    // SIMD lane addressing
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
@@ -119,13 +131,32 @@ public:
 | 
			
		||||
    }
 | 
			
		||||
    inline void iCoorFromIindex(std::vector<int> &coor,int lane)
 | 
			
		||||
    {
 | 
			
		||||
      CoorFromIndex(coor,lane,_simd_layout);
 | 
			
		||||
      Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
 | 
			
		||||
    }
 | 
			
		||||
    inline int PermuteDim(int dimension){
 | 
			
		||||
      return _simd_layout[dimension]>1;
 | 
			
		||||
    }
 | 
			
		||||
    inline int PermuteType(int dimension){
 | 
			
		||||
      int permute_type=0;
 | 
			
		||||
      //
 | 
			
		||||
      // FIXME:
 | 
			
		||||
      //
 | 
			
		||||
      // Best way to encode this would be to present a mask 
 | 
			
		||||
      // for which simd dimensions are rotated, and the rotation
 | 
			
		||||
      // size. If there is only one simd dimension rotated, this is just 
 | 
			
		||||
      // a permute. 
 | 
			
		||||
      //
 | 
			
		||||
      // Cases: PermuteType == 1,2,4,8
 | 
			
		||||
      // Distance should be either 0,1,2..
 | 
			
		||||
      //
 | 
			
		||||
      if ( _simd_layout[dimension] > 2 ) { 
 | 
			
		||||
	for(int d=0;d<_ndimension;d++){
 | 
			
		||||
	  if ( d != dimension ) assert ( (_simd_layout[d]==1)  );
 | 
			
		||||
	}
 | 
			
		||||
	permute_type = RotateBit; // How to specify distance; this is not just direction.
 | 
			
		||||
	return permute_type;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for(int d=_ndimension-1;d>dimension;d--){
 | 
			
		||||
	if (_simd_layout[d]>1 ) permute_type++;
 | 
			
		||||
      }
 | 
			
		||||
@@ -135,12 +166,12 @@ public:
 | 
			
		||||
    // Array sizing queries
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    inline int iSites(void) { return _isites; };
 | 
			
		||||
    inline int Nsimd(void)  { return _isites; };// Synonymous with iSites
 | 
			
		||||
    inline int oSites(void) { return _osites; };
 | 
			
		||||
    inline int lSites(void) { return _isites*_osites; }; 
 | 
			
		||||
    inline int gSites(void) { return _isites*_osites*_Nprocessors; }; 
 | 
			
		||||
    inline int Nd    (void) { return _ndimension;};
 | 
			
		||||
    inline int iSites(void) const { return _isites; };
 | 
			
		||||
    inline int Nsimd(void)  const { return _isites; };// Synonymous with iSites
 | 
			
		||||
    inline int oSites(void) const { return _osites; };
 | 
			
		||||
    inline int lSites(void) const { return _isites*_osites; }; 
 | 
			
		||||
    inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; 
 | 
			
		||||
    inline int Nd    (void) const { return _ndimension;};
 | 
			
		||||
 | 
			
		||||
    inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;};
 | 
			
		||||
    inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;};
 | 
			
		||||
@@ -151,7 +182,10 @@ public:
 | 
			
		||||
    // Global addressing
 | 
			
		||||
    ////////////////////////////////////////////////////////////////
 | 
			
		||||
    void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
 | 
			
		||||
      CoorFromIndex(gcoor,gidx,_gdimensions);
 | 
			
		||||
      Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
 | 
			
		||||
    }
 | 
			
		||||
    void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
 | 
			
		||||
      Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
 | 
			
		||||
    }
 | 
			
		||||
    void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
 | 
			
		||||
      gidx=0;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_full.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_FULL_H
 | 
			
		||||
#define GRID_CARTESIAN_FULL_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cartesian/Cartesian_red_black.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_CARTESIAN_RED_BLACK_H
 | 
			
		||||
#define GRID_CARTESIAN_RED_BLACK_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_base.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMMUNICATOR_BASE_H
 | 
			
		||||
#define GRID_COMMUNICATOR_BASE_H
 | 
			
		||||
 | 
			
		||||
@@ -7,6 +34,9 @@
 | 
			
		||||
#ifdef GRID_COMMS_MPI
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_COMMS_SHMEM
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
#endif
 | 
			
		||||
namespace Grid {
 | 
			
		||||
class CartesianCommunicator {
 | 
			
		||||
  public:    
 | 
			
		||||
@@ -26,6 +56,8 @@ class CartesianCommunicator {
 | 
			
		||||
    typedef int CommsRequest_t;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    static void Init(int *argc, char ***argv);
 | 
			
		||||
 | 
			
		||||
    // Constructor
 | 
			
		||||
    CartesianCommunicator(const std::vector<int> &pdimensions_in);
 | 
			
		||||
 | 
			
		||||
@@ -54,6 +86,7 @@ class CartesianCommunicator {
 | 
			
		||||
    void GlobalSumVector(RealD *,int N);
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(uint32_t &);
 | 
			
		||||
    void GlobalSum(uint64_t &);
 | 
			
		||||
 | 
			
		||||
    void GlobalSum(ComplexF &c)
 | 
			
		||||
    {
 | 
			
		||||
@@ -88,12 +121,11 @@ class CartesianCommunicator {
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void RecvFrom(void *recv,
 | 
			
		||||
		  int recv_from_rank,
 | 
			
		||||
		  int bytes);
 | 
			
		||||
    void SendTo(void *xmit,
 | 
			
		||||
		int xmit_to_rank,
 | 
			
		||||
		int bytes);
 | 
			
		||||
    void SendRecvPacket(void *xmit,
 | 
			
		||||
			void *recv,
 | 
			
		||||
			int xmit_to_rank,
 | 
			
		||||
			int recv_from_rank,
 | 
			
		||||
			int bytes);
 | 
			
		||||
 | 
			
		||||
    void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
			 void *xmit,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,49 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
#include <mpi.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // Should error check all MPI calls.
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  int flag;
 | 
			
		||||
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
			
		||||
  if ( !flag ) {
 | 
			
		||||
    MPI_Init(argc,argv);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  int Rank(void) {
 | 
			
		||||
    int pe;
 | 
			
		||||
    MPI_Comm_rank(MPI_COMM_WORLD,&pe);
 | 
			
		||||
    return pe;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
@@ -13,6 +53,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
  _Nprocessors=1;
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
  std::cout << processors << std::endl;
 | 
			
		||||
  
 | 
			
		||||
  MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
 | 
			
		||||
  MPI_Comm_rank(communicator,&_processor);
 | 
			
		||||
@@ -32,6 +73,10 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
@@ -81,21 +126,22 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int from,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  MPI_Status stat;
 | 
			
		||||
  int ierr=MPI_Recv(recv, bytes, MPI_CHAR,from,from,communicator,&stat);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int dest,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  int rank = _processor; // used for tag; must know who it comes from
 | 
			
		||||
  int ierr = MPI_Send(xmit, bytes, MPI_CHAR,dest,_processor,communicator);
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
  assert(sender != receiver);
 | 
			
		||||
  int tag = sender;
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) { 
 | 
			
		||||
    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,39 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_none.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char *** arv)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int Rank(void ){ return 0; };
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
@@ -20,17 +53,14 @@ void CartesianCommunicator::GlobalSum(float &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::RecvFrom(void *recv,
 | 
			
		||||
				     int recv_from_rank,
 | 
			
		||||
				     int bytes) 
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendTo(void *xmit,
 | 
			
		||||
				   int xmit_to_rank,
 | 
			
		||||
				   int bytes)
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int xmit_to_rank,
 | 
			
		||||
					   int recv_from_rank,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										334
									
								
								lib/communicator/Communicator_shmem.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										334
									
								
								lib/communicator/Communicator_shmem.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,334 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/communicator/Communicator_shmem.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
#include <mpp/shmem.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // Should error check all MPI calls.
 | 
			
		||||
#define SHMEM_VET(addr) 
 | 
			
		||||
 | 
			
		||||
#define SHMEM_VET_DEBUG(addr) {				\
 | 
			
		||||
  if ( ! shmem_addr_accessible(addr,_processor) ) {\
 | 
			
		||||
    std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \
 | 
			
		||||
    BACKTRACEFILE();		   \
 | 
			
		||||
  }\
 | 
			
		||||
}
 | 
			
		||||
int Rank(void) {
 | 
			
		||||
  return shmem_my_pe();
 | 
			
		||||
}
 | 
			
		||||
typedef struct HandShake_t { 
 | 
			
		||||
  uint64_t seq_local;
 | 
			
		||||
  uint64_t seq_remote;
 | 
			
		||||
} HandShake;
 | 
			
		||||
 | 
			
		||||
static Vector< HandShake > XConnections;
 | 
			
		||||
static Vector< HandShake > RConnections;
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
  shmem_init();
 | 
			
		||||
  XConnections.resize(shmem_n_pes());
 | 
			
		||||
  RConnections.resize(shmem_n_pes());
 | 
			
		||||
  for(int pe =0 ; pe<shmem_n_pes();pe++){
 | 
			
		||||
    XConnections[pe].seq_local = 0;
 | 
			
		||||
    XConnections[pe].seq_remote= 0;
 | 
			
		||||
    RConnections[pe].seq_local = 0;
 | 
			
		||||
    RConnections[pe].seq_remote= 0;
 | 
			
		||||
  }
 | 
			
		||||
  shmem_barrier_all();
 | 
			
		||||
}
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _ndimension = processors.size();
 | 
			
		||||
  std::vector<int> periodic(_ndimension,1);
 | 
			
		||||
 | 
			
		||||
  _Nprocessors=1;
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
  _processor_coor.resize(_ndimension);
 | 
			
		||||
 | 
			
		||||
  _processor = shmem_my_pe();
 | 
			
		||||
  
 | 
			
		||||
  Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors);
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<_ndimension;i++){
 | 
			
		||||
    _Nprocessors*=_processors[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int Size = shmem_n_pes(); 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  assert(Size==_Nprocessors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
			
		||||
  static long long source ;
 | 
			
		||||
  static long long dest   ;
 | 
			
		||||
  static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long      psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  //  int nreduce=1;
 | 
			
		||||
  //  int pestart=0;
 | 
			
		||||
  //  int logStride=0;
 | 
			
		||||
 | 
			
		||||
  source = u;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  shmem_barrier_all(); // necessary?
 | 
			
		||||
  u = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
			
		||||
  static long long source ;
 | 
			
		||||
  static long long dest   ;
 | 
			
		||||
  static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long      psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  //  int nreduce=1;
 | 
			
		||||
  //  int pestart=0;
 | 
			
		||||
  //  int logStride=0;
 | 
			
		||||
 | 
			
		||||
  source = u;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  shmem_barrier_all(); // necessary?
 | 
			
		||||
  u = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
			
		||||
  static float source ;
 | 
			
		||||
  static float dest   ;
 | 
			
		||||
  static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  source = f;
 | 
			
		||||
  dest   =0.0;
 | 
			
		||||
  shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  f = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
			
		||||
{
 | 
			
		||||
  static float source ;
 | 
			
		||||
  static float dest   = 0 ;
 | 
			
		||||
  static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(f,_processor)  ){
 | 
			
		||||
    shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    dest   =0.0;
 | 
			
		||||
    source = f[i];
 | 
			
		||||
    shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    f[i] = dest;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
			
		||||
{
 | 
			
		||||
  static double source;
 | 
			
		||||
  static double dest  ;
 | 
			
		||||
  static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  source = d;
 | 
			
		||||
  dest   = 0;
 | 
			
		||||
  shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
  d = dest;
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
			
		||||
{
 | 
			
		||||
  static double source ;
 | 
			
		||||
  static double dest   ;
 | 
			
		||||
  static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(d,_processor)  ){
 | 
			
		||||
    shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int i=0;i<N;i++){
 | 
			
		||||
    source = d[i];
 | 
			
		||||
    dest   =0.0;
 | 
			
		||||
    shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync);
 | 
			
		||||
    d[i] = dest;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
			
		||||
{
 | 
			
		||||
  std::vector<int> coor = _processor_coor;
 | 
			
		||||
 | 
			
		||||
  assert(std::abs(shift) <_processors[dim]);
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,source,_processors);
 | 
			
		||||
 | 
			
		||||
  coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,dest,_processors);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  int rank;
 | 
			
		||||
  Lexicographic::IndexFromCoor(coor,rank,_processors);
 | 
			
		||||
  return rank;
 | 
			
		||||
}
 | 
			
		||||
void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
			
		||||
{
 | 
			
		||||
  Lexicographic::CoorFromIndex(coor,rank,_processors);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int from,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SHMEM_VET(xmit);
 | 
			
		||||
  SHMEM_VET(recv);
 | 
			
		||||
  std::vector<CommsRequest_t> reqs(0);
 | 
			
		||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
			
		||||
  SendToRecvFromComplete(reqs);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int sender,
 | 
			
		||||
					   int receiver,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static uint64_t seq;
 | 
			
		||||
 | 
			
		||||
  assert(recv!=xmit);
 | 
			
		||||
  volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender];
 | 
			
		||||
  volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver];
 | 
			
		||||
 | 
			
		||||
  if ( _processor == sender ) {
 | 
			
		||||
 | 
			
		||||
    printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver);
 | 
			
		||||
    // Check he has posted a receive
 | 
			
		||||
    while(SendSeq->seq_remote == SendSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
    printf("Sender receive %d posted\n",sender,receiver);
 | 
			
		||||
 | 
			
		||||
    // Advance our send count
 | 
			
		||||
    seq = ++(SendSeq->seq_local);
 | 
			
		||||
    
 | 
			
		||||
    // Send this packet 
 | 
			
		||||
    SHMEM_VET(recv);
 | 
			
		||||
    shmem_putmem(recv,xmit,bytes,receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
 | 
			
		||||
    printf("Sender sent payload %d\n",seq);
 | 
			
		||||
    //Notify him we're done
 | 
			
		||||
    shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
 | 
			
		||||
    shmem_fence();
 | 
			
		||||
    printf("Sender ringing door bell  %d\n",seq);
 | 
			
		||||
  }
 | 
			
		||||
  if ( _processor == receiver ) {
 | 
			
		||||
 | 
			
		||||
    printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver);
 | 
			
		||||
    // Post a receive
 | 
			
		||||
    seq = ++(RecvSeq->seq_local);
 | 
			
		||||
    shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
 | 
			
		||||
 | 
			
		||||
    printf("Receiver Opening letter box %d\n",seq);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    // Now wait until he has advanced our reception counter
 | 
			
		||||
    while(RecvSeq->seq_remote != RecvSeq->seq_local);
 | 
			
		||||
 | 
			
		||||
    printf("Receiver Got the mail %d\n",seq);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SHMEM_VET(xmit);
 | 
			
		||||
  SHMEM_VET(recv);
 | 
			
		||||
  //  shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
 | 
			
		||||
  shmem_putmem(recv,xmit,bytes,dest);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
			
		||||
{
 | 
			
		||||
  //  shmem_quiet();      // I'm done
 | 
			
		||||
  shmem_barrier_all();// He's done too
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Barrier(void)
 | 
			
		||||
{
 | 
			
		||||
  shmem_barrier_all();
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
  static uint32_t word;
 | 
			
		||||
  uint32_t *array = (uint32_t *) data;
 | 
			
		||||
  assert( (bytes % 4)==0);
 | 
			
		||||
  int words = bytes/4;
 | 
			
		||||
 | 
			
		||||
  if ( shmem_addr_accessible(data,_processor)  ){
 | 
			
		||||
    shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for(int w=0;w<words;w++){
 | 
			
		||||
    word = array[w];
 | 
			
		||||
    shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    if ( shmem_my_pe() != root ) {
 | 
			
		||||
      array[w] = word;
 | 
			
		||||
    }
 | 
			
		||||
    shmem_barrier_all();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
{
 | 
			
		||||
  static long  psync[_SHMEM_REDUCE_SYNC_SIZE];
 | 
			
		||||
  static uint32_t word;
 | 
			
		||||
  uint32_t *array = (uint32_t *) data;
 | 
			
		||||
  assert( (bytes % 4)==0);
 | 
			
		||||
  int words = bytes/4;
 | 
			
		||||
 | 
			
		||||
  for(int w=0;w<words;w++){
 | 
			
		||||
    word = array[w];
 | 
			
		||||
    shmem_broadcast32((void *)&word,(void *)&word,1,root,0,0,shmem_n_pes(),psync);
 | 
			
		||||
    if ( shmem_my_pe() != root ) {
 | 
			
		||||
      array[w]= word;
 | 
			
		||||
    }
 | 
			
		||||
    shmem_barrier_all();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_common.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
#define _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
 | 
			
		||||
@@ -8,7 +35,7 @@ class SimpleCompressor {
 | 
			
		||||
public:
 | 
			
		||||
  void Point(int) {};
 | 
			
		||||
 | 
			
		||||
  vobj operator() (const vobj &arg,int dimension,int plane,int osite,GridBase *grid) {
 | 
			
		||||
  vobj operator() (const vobj &arg) {
 | 
			
		||||
    return arg;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@@ -17,7 +44,7 @@ public:
 | 
			
		||||
// Gather for when there is no need to SIMD split with compression
 | 
			
		||||
///////////////////////////////////////////////////////////////////
 | 
			
		||||
template<class vobj,class cobj,class compressor> void 
 | 
			
		||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress)
 | 
			
		||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -29,24 +56,24 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
 | 
			
		||||
  
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
  int stride=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if ( cbmask == 0x3 ) { 
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o  = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int bo = n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
	buffer[bo+b]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	int o  = n*stride;
 | 
			
		||||
	int bo = n*e2;
 | 
			
		||||
	buffer[off+bo+b]=compress(rhs._odata[so+o+b]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else { 
 | 
			
		||||
     int bo=0;
 | 
			
		||||
     for(int n=0;n<e1;n++){
 | 
			
		||||
       for(int b=0;b<e2;b++){
 | 
			
		||||
	 int o  = n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	 int o  = n*stride;
 | 
			
		||||
	 int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
			
		||||
	 if ( ocb &cbmask ) {
 | 
			
		||||
	   buffer[bo++]=compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	   buffer[off+bo++]=compress(rhs._odata[so+o+b]);
 | 
			
		||||
	 }
 | 
			
		||||
       }
 | 
			
		||||
     }
 | 
			
		||||
@@ -70,16 +97,16 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  
 | 
			
		||||
  int n1=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  int n2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  if ( cbmask ==0x3){
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 | 
			
		||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
	cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	int o      =   n*n1;
 | 
			
		||||
	int offset = b+n*n2;
 | 
			
		||||
	cobj temp =compress(rhs._odata[so+o+b]);
 | 
			
		||||
	extract<cobj>(temp,pointers,offset);
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
@@ -94,7 +121,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
	if ( ocb & cbmask ) {
 | 
			
		||||
	  cobj temp =compress(rhs._odata[so+o+b],dimension,plane,so+o+b,rhs._grid);
 | 
			
		||||
	  cobj temp =compress(rhs._odata[so+o+b]);
 | 
			
		||||
	  extract<cobj>(temp,pointers,offset);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
@@ -216,13 +243,13 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if(cbmask == 0x3 ){
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 
 | 
			
		||||
        int o =n*rhs._grid->_slice_stride[dimension]+b;
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
	vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
 | 
			
		||||
      }
 | 
			
		||||
@@ -232,7 +259,7 @@ PARALLEL_NESTED_LOOP2
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 
 | 
			
		||||
        int o =n*rhs._grid->_slice_stride[dimension]+b;
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
        int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
 | 
			
		||||
        if ( ocb&cbmask ) {
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
@@ -258,11 +285,12 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
 | 
			
		||||
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block [dimension];
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
PARALLEL_NESTED_LOOP2
 | 
			
		||||
  for(int n=0;n<e1;n++){
 | 
			
		||||
  for(int b=0;b<e2;b++){
 | 
			
		||||
 | 
			
		||||
      int o  =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
      int o  =n*stride;
 | 
			
		||||
      int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
 | 
			
		||||
      if ( ocb&cbmask ) {
 | 
			
		||||
	permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
 | 
			
		||||
@@ -296,6 +324,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
  int rd = grid->_rdimensions[dimension];
 | 
			
		||||
  int ld = grid->_ldimensions[dimension];
 | 
			
		||||
  int gd = grid->_gdimensions[dimension];
 | 
			
		||||
  int ly = grid->_simd_layout[dimension];
 | 
			
		||||
 | 
			
		||||
  // Map to always positive shift modulo global full dimension.
 | 
			
		||||
  shift = (shift+fd)%fd;
 | 
			
		||||
@@ -304,6 +333,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
  // the permute type
 | 
			
		||||
  int permute_dim =grid->PermuteDim(dimension);
 | 
			
		||||
  int permute_type=grid->PermuteType(dimension);
 | 
			
		||||
  int permute_type_dist;
 | 
			
		||||
 | 
			
		||||
  for(int x=0;x<rd;x++){       
 | 
			
		||||
 | 
			
		||||
@@ -315,15 +345,31 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
			
		||||
    int sx     = (x+sshift)%rd;
 | 
			
		||||
 | 
			
		||||
    // FIXME : This must change where we have a 
 | 
			
		||||
    // Rotate slice.
 | 
			
		||||
    
 | 
			
		||||
    // Document how this works ; why didn't I do this when I first wrote it...
 | 
			
		||||
    // wrap is whether sshift > rd.
 | 
			
		||||
    //  num is sshift mod rd.
 | 
			
		||||
    // 
 | 
			
		||||
    int permute_slice=0;
 | 
			
		||||
    if(permute_dim){
 | 
			
		||||
      int wrap = sshift/rd;
 | 
			
		||||
      int  num = sshift%rd;
 | 
			
		||||
 | 
			
		||||
      if ( x< rd-num ) permute_slice=wrap;
 | 
			
		||||
      else permute_slice = 1-wrap;
 | 
			
		||||
      else permute_slice = (wrap+1)%ly;
 | 
			
		||||
 | 
			
		||||
      if ( (ly>2) && (permute_slice) ) {
 | 
			
		||||
	assert(permute_type & RotateBit);
 | 
			
		||||
	permute_type_dist = permute_type|permute_slice;
 | 
			
		||||
      } else {
 | 
			
		||||
	permute_type_dist = permute_type;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type);
 | 
			
		||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
 | 
			
		||||
    else                 Copy_plane(ret,rhs,dimension,x,sx,cbmask); 
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_mpi.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_MPI_H_
 | 
			
		||||
#define _GRID_CSHIFT_MPI_H_
 | 
			
		||||
 | 
			
		||||
@@ -163,11 +191,12 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
			
		||||
  int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
 | 
			
		||||
  int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
 | 
			
		||||
  std::vector<std::vector<scalar_object> > send_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<std::vector<scalar_object> > recv_buf_extract(Nsimd,std::vector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<Vector<scalar_object> >   send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
 | 
			
		||||
  std::vector<Vector<scalar_object> >   recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
 | 
			
		||||
 | 
			
		||||
  int bytes = buffer_size*sizeof(scalar_object);
 | 
			
		||||
 | 
			
		||||
  std::vector<scalar_object *>  pointers(Nsimd);  // 
 | 
			
		||||
  std::vector<scalar_object *>  pointers(Nsimd); // 
 | 
			
		||||
  std::vector<scalar_object *> rpointers(Nsimd); // received pointers
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/cshift/Cshift_none.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef _GRID_CSHIFT_NONE_H_
 | 
			
		||||
#define _GRID_CSHIFT_NONE_H_
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_ET.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_ET_H
 | 
			
		||||
#define GRID_LATTICE_ET_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_arith.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_ARITH_H
 | 
			
		||||
#define GRID_LATTICE_ARITH_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_base.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_BASE_H
 | 
			
		||||
#define GRID_LATTICE_BASE_H
 | 
			
		||||
 | 
			
		||||
@@ -26,7 +55,13 @@ extern int GridCshiftPermuteMap[4][16];
 | 
			
		||||
// Basic expressions used in Expression Template
 | 
			
		||||
////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class LatticeBase {};
 | 
			
		||||
class LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    virtual ~LatticeBase(void) = default;
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
class LatticeExpressionBase {};
 | 
			
		||||
 | 
			
		||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;               // Aligned allocator??
 | 
			
		||||
@@ -59,8 +94,6 @@ template<class vobj>
 | 
			
		||||
class Lattice : public LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int checkerboard;
 | 
			
		||||
    Vector<vobj> _odata;
 | 
			
		||||
    
 | 
			
		||||
@@ -73,7 +106,6 @@ public:
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
    typedef typename vobj::vector_type vector_type;
 | 
			
		||||
    typedef vobj vector_object;
 | 
			
		||||
 
 | 
			
		||||
   
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Expression Template closure support
 | 
			
		||||
@@ -149,8 +181,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  }
 | 
			
		||||
  //GridFromExpression is tricky to do
 | 
			
		||||
  template<class Op,class T1>
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
 | 
			
		||||
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -171,7 +203,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2>
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -192,7 +225,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2, class T3>
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -212,14 +246,17 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
    // what about a default grid?
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
 | 
			
		||||
      //        _odata.reserve(_grid->oSites());
 | 
			
		||||
      //        _odata.resize(_grid->oSites());
 | 
			
		||||
    Lattice(GridBase *grid) : _odata(grid->oSites()) {
 | 
			
		||||
        _grid = grid;
 | 
			
		||||
    //        _odata.reserve(_grid->oSites());
 | 
			
		||||
    //        _odata.resize(_grid->oSites());
 | 
			
		||||
    //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
 | 
			
		||||
        assert((((uint64_t)&_odata[0])&0xF) ==0);
 | 
			
		||||
        checkerboard=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual ~Lattice(void) = default;
 | 
			
		||||
    
 | 
			
		||||
    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_comparison.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_COMPARISON_H
 | 
			
		||||
#define GRID_LATTICE_COMPARISON_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_comparison_utils.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_COMPARISON_H
 | 
			
		||||
#define GRID_COMPARISON_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_conformable.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
#define GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_coordinate.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_COORDINATE_H
 | 
			
		||||
#define GRID_LATTICE_COORDINATE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_local.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
 | 
			
		||||
#define GRID_LATTICE_LOCALREDUCTION_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_overload.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_OVERLOAD_H
 | 
			
		||||
#define GRID_LATTICE_OVERLOAD_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_peekpoke.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_PEEK_H
 | 
			
		||||
#define GRID_LATTICE_PEEK_H
 | 
			
		||||
 | 
			
		||||
@@ -123,7 +152,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    // Peek a scalar object from the SIMD array
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj,class sobj>
 | 
			
		||||
    void peekLocalSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
    void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
        
 | 
			
		||||
      GridBase *grid=l._grid;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_reality.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_REALITY_H
 | 
			
		||||
#define GRID_LATTICE_REALITY_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_reduction.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_REDUCTION_H
 | 
			
		||||
#define GRID_LATTICE_REDUCTION_H
 | 
			
		||||
 | 
			
		||||
@@ -19,7 +48,6 @@ namespace Grid {
 | 
			
		||||
    {
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
      vector_type vnrm;
 | 
			
		||||
      scalar_type  nrm;
 | 
			
		||||
 | 
			
		||||
      GridBase *grid = left._grid;
 | 
			
		||||
@@ -31,7 +59,6 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
			
		||||
 | 
			
		||||
	int nwork, mywork, myoff;
 | 
			
		||||
	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
	
 | 
			
		||||
@@ -125,7 +152,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
 | 
			
		||||
  assert(grid!=NULL);
 | 
			
		||||
 | 
			
		||||
  // FIXME
 | 
			
		||||
  std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
 | 
			
		||||
  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
 | 
			
		||||
 | 
			
		||||
  const int    Nd = grid->_ndimension;
 | 
			
		||||
  const int Nsimd = grid->Nsimd();
 | 
			
		||||
@@ -151,7 +178,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
 | 
			
		||||
  // sum over reduced dimension planes, breaking out orthog dir
 | 
			
		||||
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss++){
 | 
			
		||||
    GridBase::CoorFromIndex(coor,ss,grid->_rdimensions);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions);
 | 
			
		||||
    int r = coor[orthogdim];
 | 
			
		||||
    lvSum[r]=lvSum[r]+Data._odata[ss];
 | 
			
		||||
  }  
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_rng.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_RNG_H
 | 
			
		||||
#define GRID_LATTICE_RNG_H
 | 
			
		||||
 | 
			
		||||
@@ -22,13 +50,16 @@ namespace Grid {
 | 
			
		||||
      assert(fine->_processors[d]==1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // local and global volumes subdivide cleanly after SIMDization
 | 
			
		||||
    int multiplicity=1;
 | 
			
		||||
    for(int d=0;d<lowerdims;d++){
 | 
			
		||||
      multiplicity=multiplicity*fine->_rdimensions[d];
 | 
			
		||||
    }
 | 
			
		||||
    // local and global volumes subdivide cleanly after SIMDization
 | 
			
		||||
    for(int d=0;d<rngdims;d++){
 | 
			
		||||
      int fd= d+lowerdims;
 | 
			
		||||
      assert(coarse->_processors[d]  == fine->_processors[fd]);
 | 
			
		||||
      assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
 | 
			
		||||
      assert((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[fd]); 
 | 
			
		||||
      assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); 
 | 
			
		||||
 | 
			
		||||
      multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; 
 | 
			
		||||
    }
 | 
			
		||||
@@ -44,7 +75,7 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
    std::seed_seq src;
 | 
			
		||||
    
 | 
			
		||||
    fixedSeed(std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
 | 
			
		||||
    fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
 | 
			
		||||
 | 
			
		||||
    result_type operator () (void){
 | 
			
		||||
 | 
			
		||||
@@ -76,16 +107,41 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
   GridRNGbase() : _uniform{0,1}, _gaussian(0.0,1.0) {};
 | 
			
		||||
 | 
			
		||||
    int _seeded;
 | 
			
		||||
    // One generator per site.
 | 
			
		||||
    // Uniform and Gaussian distributions from these generators.
 | 
			
		||||
    std::vector<std::ranlux48>             _generators;
 | 
			
		||||
    std::uniform_real_distribution<double> _uniform;
 | 
			
		||||
    std::normal_distribution<double>       _gaussian;
 | 
			
		||||
 | 
			
		||||
#ifdef RNG_RANLUX
 | 
			
		||||
    typedef uint64_t      RngStateType;
 | 
			
		||||
    typedef std::ranlux48 RngEngine;
 | 
			
		||||
    static const int RngStateCount = 15;
 | 
			
		||||
#else
 | 
			
		||||
    typedef std::mt19937 RngEngine;
 | 
			
		||||
    typedef uint32_t     RngStateType;
 | 
			
		||||
    static const int     RngStateCount = std::mt19937::state_size;
 | 
			
		||||
#endif
 | 
			
		||||
    std::vector<RngEngine>                             _generators;
 | 
			
		||||
    std::vector<std::uniform_real_distribution<RealD>> _uniform;
 | 
			
		||||
    std::vector<std::normal_distribution<RealD>>       _gaussian;
 | 
			
		||||
    std::vector<std::discrete_distribution<int32_t>>     _bernoulli;
 | 
			
		||||
 | 
			
		||||
    void GetState(std::vector<RngStateType> & saved,int gen) {
 | 
			
		||||
      saved.resize(RngStateCount);
 | 
			
		||||
      std::stringstream ss;
 | 
			
		||||
      ss<<_generators[gen];
 | 
			
		||||
      ss.seekg(0,ss.beg);
 | 
			
		||||
      for(int i=0;i<RngStateCount;i++){
 | 
			
		||||
	ss>>saved[i];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    void SetState(std::vector<RngStateType> & saved,int gen){
 | 
			
		||||
      assert(saved.size()==RngStateCount);
 | 
			
		||||
      std::stringstream ss;
 | 
			
		||||
      for(int i=0;i<RngStateCount;i++){
 | 
			
		||||
	ss<< saved[i]<<" ";
 | 
			
		||||
      }
 | 
			
		||||
      ss.seekg(0,ss.beg);
 | 
			
		||||
      ss>>_generators[gen];
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class GridSerialRNG : public GridRNGbase {
 | 
			
		||||
@@ -98,18 +154,21 @@ namespace Grid {
 | 
			
		||||
    {
 | 
			
		||||
      typename source::result_type init = src();
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&init,sizeof(init));
 | 
			
		||||
      _generators[0] = std::ranlux48(init);
 | 
			
		||||
      _generators[0] = RngEngine(init);
 | 
			
		||||
      _seeded=1;
 | 
			
		||||
    }    
 | 
			
		||||
 | 
			
		||||
    GridSerialRNG() : GridRNGbase() {
 | 
			
		||||
      _generators.resize(1);
 | 
			
		||||
      _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
 | 
			
		||||
      _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
 | 
			
		||||
      _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
 | 
			
		||||
      _seeded=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    template <class sobj,class distribution> inline void fill(sobj &l,distribution &dist){
 | 
			
		||||
    template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
 | 
			
		||||
 | 
			
		||||
      typedef typename sobj::scalar_type scalar_type;
 | 
			
		||||
 
 | 
			
		||||
@@ -117,56 +176,65 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      scalar_type *buf = (scalar_type *) & l;
 | 
			
		||||
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int idx=0;idx<words;idx++){
 | 
			
		||||
	fillScalar(buf[idx],dist,_generators[0]);
 | 
			
		||||
	fillScalar(buf[idx],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template <class distribution>  inline void fill(ComplexF &l,distribution &dist){
 | 
			
		||||
      fillScalar(l,dist,_generators[0]);
 | 
			
		||||
    template <class distribution>  inline void fill(ComplexF &l,std::vector<distribution> &dist){
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      fillScalar(l,dist[0],_generators[0]);
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(ComplexD &l,distribution &dist){
 | 
			
		||||
      fillScalar(l,dist,_generators[0]);
 | 
			
		||||
    template <class distribution>  inline void fill(ComplexD &l,std::vector<distribution> &dist){
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      fillScalar(l,dist[0],_generators[0]);
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(RealF &l,distribution &dist){
 | 
			
		||||
      fillScalar(l,dist,_generators[0]);
 | 
			
		||||
    template <class distribution>  inline void fill(RealF &l,std::vector<distribution> &dist){
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      fillScalar(l,dist[0],_generators[0]);
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(RealD &l,distribution &dist){
 | 
			
		||||
      fillScalar(l,dist,_generators[0]);
 | 
			
		||||
    template <class distribution>  inline void fill(RealD &l,std::vector<distribution> &dist){
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      fillScalar(l,dist[0],_generators[0]);
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    // vector fill
 | 
			
		||||
    template <class distribution>  inline void fill(vComplexF &l,distribution &dist){
 | 
			
		||||
    template <class distribution>  inline void fill(vComplexF &l,std::vector<distribution> &dist){
 | 
			
		||||
      RealF *pointer=(RealF *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<2*vComplexF::Nsimd();i++){
 | 
			
		||||
	fillScalar(pointer[i],dist,_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(vComplexD &l,distribution &dist){
 | 
			
		||||
    template <class distribution>  inline void fill(vComplexD &l,std::vector<distribution> &dist){
 | 
			
		||||
      RealD *pointer=(RealD *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<2*vComplexD::Nsimd();i++){
 | 
			
		||||
	fillScalar(pointer[i],dist,_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(vRealF &l,distribution &dist){
 | 
			
		||||
    template <class distribution>  inline void fill(vRealF &l,std::vector<distribution> &dist){
 | 
			
		||||
      RealF *pointer=(RealF *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<vRealF::Nsimd();i++){
 | 
			
		||||
	fillScalar(pointer[i],dist,_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
    template <class distribution>  inline void fill(vRealD &l,distribution &dist){
 | 
			
		||||
    template <class distribution>  inline void fill(vRealD &l,std::vector<distribution> &dist){
 | 
			
		||||
      RealD *pointer=(RealD *)&l;
 | 
			
		||||
      dist[0].reset();
 | 
			
		||||
      for(int i=0;i<vRealD::Nsimd();i++){
 | 
			
		||||
	fillScalar(pointer[i],dist,_generators[0]);
 | 
			
		||||
	fillScalar(pointer[i],dist[0],_generators[0]);
 | 
			
		||||
      }
 | 
			
		||||
      CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
 | 
			
		||||
    }
 | 
			
		||||
@@ -176,7 +244,7 @@ namespace Grid {
 | 
			
		||||
      std::random_device rd;
 | 
			
		||||
      Seed(rd);
 | 
			
		||||
    }
 | 
			
		||||
    void SeedFixedIntegers(std::vector<int> &seeds){
 | 
			
		||||
    void SeedFixedIntegers(const std::vector<int> &seeds){
 | 
			
		||||
      fixedSeed src(seeds);
 | 
			
		||||
      Seed(src);
 | 
			
		||||
    }
 | 
			
		||||
@@ -185,12 +253,6 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
  class GridParallelRNG : public GridRNGbase {
 | 
			
		||||
  public:
 | 
			
		||||
    // One generator per site.
 | 
			
		||||
    std::vector<std::ranlux48>             _generators;
 | 
			
		||||
    
 | 
			
		||||
    // Uniform and Gaussian distributions from these generators.
 | 
			
		||||
    std::uniform_real_distribution<double> _uniform;
 | 
			
		||||
    std::normal_distribution<double>       _gaussian;
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int _vol;
 | 
			
		||||
@@ -202,7 +264,11 @@ namespace Grid {
 | 
			
		||||
    GridParallelRNG(GridBase *grid) : GridRNGbase() {
 | 
			
		||||
      _grid=grid;
 | 
			
		||||
      _vol =_grid->iSites()*_grid->oSites();
 | 
			
		||||
 | 
			
		||||
      _generators.resize(_vol);
 | 
			
		||||
      _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
 | 
			
		||||
      _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
 | 
			
		||||
      _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
 | 
			
		||||
      _seeded=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -220,7 +286,7 @@ namespace Grid {
 | 
			
		||||
      int gsites = _grid->_gsites;
 | 
			
		||||
 | 
			
		||||
      typename source::result_type init = src();
 | 
			
		||||
      std::ranlux48 pseeder(init);
 | 
			
		||||
      RngEngine pseeder(init);
 | 
			
		||||
      std::uniform_int_distribution<uint64_t> ui;
 | 
			
		||||
 | 
			
		||||
      for(int gidx=0;gidx<gsites;gidx++){
 | 
			
		||||
@@ -241,7 +307,7 @@ namespace Grid {
 | 
			
		||||
	if( rank == _grid->ThisRank() ){
 | 
			
		||||
	  fixedSeed ssrc(site_seeds);
 | 
			
		||||
	  typename source::result_type sinit = ssrc();
 | 
			
		||||
	  _generators[l_idx] = std::ranlux48(sinit);
 | 
			
		||||
	  _generators[l_idx] = RngEngine(sinit);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      _seeded=1;
 | 
			
		||||
@@ -251,7 +317,7 @@ namespace Grid {
 | 
			
		||||
    //void SaveState(const std::string<char> &file);
 | 
			
		||||
    //void LoadState(const std::string<char> &file);
 | 
			
		||||
 | 
			
		||||
    template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,distribution &dist){
 | 
			
		||||
    template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_object scalar_object;
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
@@ -275,8 +341,9 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
	  for(int si=0;si<Nsimd;si++){
 | 
			
		||||
	    int gdx = generator_idx(ss,si); // index of generator state
 | 
			
		||||
	    scalar_type *pointer = (scalar_type *)&buf[si];
 | 
			
		||||
	    dist[gdx].reset();
 | 
			
		||||
	    for(int idx=0;idx<words;idx++){
 | 
			
		||||
	      fillScalar(pointer[idx],dist,_generators[gdx]);
 | 
			
		||||
	      fillScalar(pointer[idx],dist[gdx],_generators[gdx]);
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
 | 
			
		||||
@@ -290,7 +357,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
      std::random_device rd;
 | 
			
		||||
      Seed(rd);
 | 
			
		||||
    }
 | 
			
		||||
    void SeedFixedIntegers(std::vector<int> &seeds){
 | 
			
		||||
    void SeedFixedIntegers(const std::vector<int> &seeds){
 | 
			
		||||
      fixedSeed src(seeds);
 | 
			
		||||
      Seed(src);
 | 
			
		||||
    }
 | 
			
		||||
@@ -304,14 +371,22 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l){
 | 
			
		||||
    rng.fill(l,rng._gaussian);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){
 | 
			
		||||
    rng.fill(l,rng._bernoulli);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <class sobj> inline void random(GridSerialRNG &rng,sobj &l){
 | 
			
		||||
    rng.fill(l,rng._uniform);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l){
 | 
			
		||||
    rng.fill(l,rng._gaussian);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){
 | 
			
		||||
    rng.fill(l,rng._bernoulli);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_trace.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_TRACE_H
 | 
			
		||||
#define GRID_LATTICE_TRACE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,30 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_transfer.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_TRANSFER_H
 | 
			
		||||
#define GRID_LATTICE_TRANSFER_H
 | 
			
		||||
 | 
			
		||||
@@ -17,7 +44,7 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // remove and insert a half checkerboard
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -88,9 +115,9 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
    int sc;
 | 
			
		||||
    std::vector<int> coor_c(_ndimension);
 | 
			
		||||
    std::vector<int> coor_f(_ndimension);
 | 
			
		||||
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++) {
 | 
			
		||||
      
 | 
			
		||||
@@ -133,9 +160,9 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    std::vector<int> coor_c(_ndimension);
 | 
			
		||||
    std::vector<int> coor_f(_ndimension);
 | 
			
		||||
 | 
			
		||||
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
 | 
			
		||||
    // z = A x + y
 | 
			
		||||
    fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf];
 | 
			
		||||
@@ -198,9 +225,9 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
 | 
			
		||||
    std::vector<int> coor_c(_ndimension);
 | 
			
		||||
    std::vector<int> coor_f(_ndimension);
 | 
			
		||||
 | 
			
		||||
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
 | 
			
		||||
    coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
 | 
			
		||||
 | 
			
		||||
@@ -284,9 +311,9 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
    std::vector<int> coor_c(_ndimension);
 | 
			
		||||
    std::vector<int> coor_f(_ndimension);
 | 
			
		||||
 | 
			
		||||
    GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
 | 
			
		||||
    for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
 | 
			
		||||
    GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
    Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++) {
 | 
			
		||||
      if(i==0) fineData._odata[sf]=coarseData._odata[sc](i) * Basis[i]._odata[sf];
 | 
			
		||||
@@ -298,6 +325,126 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
 | 
			
		||||
// Simd layouts need not match since we use peek/poke Local
 | 
			
		||||
template<class vobj,class vvobj>
 | 
			
		||||
void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  typedef typename vvobj::scalar_object ssobj;
 | 
			
		||||
 | 
			
		||||
  sobj s;
 | 
			
		||||
  ssobj ss;
 | 
			
		||||
 | 
			
		||||
  GridBase *ig = in._grid;
 | 
			
		||||
  GridBase *og = out._grid;
 | 
			
		||||
 | 
			
		||||
  int ni = ig->_ndimension;
 | 
			
		||||
  int no = og->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(ni == no);
 | 
			
		||||
 | 
			
		||||
  for(int d=0;d<no;d++){
 | 
			
		||||
    assert(ig->_processors[d]  == og->_processors[d]);
 | 
			
		||||
    assert(ig->_ldimensions[d] == og->_ldimensions[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<ig->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(ni);
 | 
			
		||||
    ig->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    peekLocalSite(s,in,lcoor);
 | 
			
		||||
    ss=s;
 | 
			
		||||
    pokeLocalSite(ss,out,lcoor);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl+1 == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
  assert(hg->_processors[orthog]==1);
 | 
			
		||||
 | 
			
		||||
  int dl; dl = 0;
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    if ( d != orthog) {
 | 
			
		||||
      assert(lg->_processors[dl]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
 | 
			
		||||
      dl++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    dl=0;
 | 
			
		||||
    hcoor[orthog] = slice;
 | 
			
		||||
    for(int d=0;d<nh;d++){
 | 
			
		||||
      if ( d!=orthog ) { 
 | 
			
		||||
	hcoor[d]=lcoor[dl++];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    peekLocalSite(s,lowDim,lcoor);
 | 
			
		||||
    pokeLocalSite(s,higherDim,hcoor);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl+1 == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
  assert(hg->_processors[orthog]==1);
 | 
			
		||||
 | 
			
		||||
  int dl; dl = 0;
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    if ( d != orthog) {
 | 
			
		||||
      assert(lg->_processors[dl]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
 | 
			
		||||
      dl++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    dl=0;
 | 
			
		||||
    hcoor[orthog] = slice;
 | 
			
		||||
    for(int d=0;d<nh;d++){
 | 
			
		||||
      if ( d!=orthog ) { 
 | 
			
		||||
	hcoor[d]=lcoor[dl++];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    peekLocalSite(s,higherDim,hcoor);
 | 
			
		||||
    pokeLocalSite(s,lowDim,lcoor);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_transpose.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_TRANSPOSE_H
 | 
			
		||||
#define GRID_LATTICE_TRANSPOSE_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,33 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_unary.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: neo <cossu@post.kek.jp>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_UNARY_H
 | 
			
		||||
#define GRID_LATTICE_UNARY_H
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/lattice/Lattice_where.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_LATTICE_WHERE_H
 | 
			
		||||
#define GRID_LATTICE_WHERE_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,31 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/parallelIO/BinaryIO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_BINARY_IO_H
 | 
			
		||||
#define GRID_BINARY_IO_H
 | 
			
		||||
 | 
			
		||||
@@ -107,7 +135,7 @@ class BinaryIO {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> lat,munger munge,uint32_t &csum)
 | 
			
		||||
  template<class vobj,class fobj,class munger> static inline void Uint32Checksum(Lattice<vobj> &lat,munger munge,uint32_t &csum)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename vobj::scalar_object sobj;
 | 
			
		||||
    GridBase *grid = lat._grid ;
 | 
			
		||||
@@ -118,7 +146,7 @@ class BinaryIO {
 | 
			
		||||
    csum = 0;
 | 
			
		||||
    std::vector<int> lcoor;
 | 
			
		||||
    for(int l=0;l<grid->lSites();l++){
 | 
			
		||||
      grid->CoorFromIndex(lcoor,l,grid->_ldimensions);
 | 
			
		||||
      Lexicographic::CoorFromIndex(lcoor,l,grid->_ldimensions);
 | 
			
		||||
      peekLocalSite(siteObj,lat,lcoor);
 | 
			
		||||
      munge(siteObj,fileObj,csum);
 | 
			
		||||
    }
 | 
			
		||||
@@ -140,6 +168,7 @@ class BinaryIO {
 | 
			
		||||
    GridBase *grid = Umu._grid;
 | 
			
		||||
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl;
 | 
			
		||||
    GridStopWatch timer; timer.Start();
 | 
			
		||||
 | 
			
		||||
    int ieee32big = (format == std::string("IEEE32BIG"));
 | 
			
		||||
    int ieee32    = (format == std::string("IEEE32"));
 | 
			
		||||
@@ -154,6 +183,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    uint64_t bytes=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj munged;
 | 
			
		||||
    
 | 
			
		||||
@@ -166,7 +196,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&file_object,sizeof(file_object));
 | 
			
		||||
	
 | 
			
		||||
	bytes += sizeof(file_object);
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
@@ -177,6 +207,10 @@ class BinaryIO {
 | 
			
		||||
      // The boss who read the file has their value poked
 | 
			
		||||
      pokeSite(munged,Umu,site);
 | 
			
		||||
    }}}}
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/ (double)timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -196,13 +230,14 @@ class BinaryIO {
 | 
			
		||||
    // Serialise through node zero
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial write I/O "<< file<<std::endl;
 | 
			
		||||
    GridStopWatch timer; timer.Start();
 | 
			
		||||
 | 
			
		||||
    std::ofstream fout;
 | 
			
		||||
    if ( grid->IsBoss() ) {
 | 
			
		||||
      fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
 | 
			
		||||
      fout.seekp(offset);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    uint64_t bytes=0;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj file_object;
 | 
			
		||||
    sobj unmunged;
 | 
			
		||||
@@ -224,14 +259,126 @@ class BinaryIO {
 | 
			
		||||
	if(ieee32)    htole32_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	if(ieee64)    htole64_v((void *)&file_object,sizeof(file_object));
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
	// NB could gather an xstrip as an optimisation.
 | 
			
		||||
	fout.write((char *)&file_object,sizeof(file_object));
 | 
			
		||||
	bytes+=sizeof(file_object);
 | 
			
		||||
      }
 | 
			
		||||
    }}}}
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline uint32_t writeRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename GridSerialRNG::RngStateType RngStateType;
 | 
			
		||||
    const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = parallel._grid;
 | 
			
		||||
    int gsites = grid->_gsites;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    // Serialise through node zero
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial RNG write I/O "<< file<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::ofstream fout;
 | 
			
		||||
    if ( grid->IsBoss() ) {
 | 
			
		||||
      fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
 | 
			
		||||
      fout.seekp(offset);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    std::vector<RngStateType> saved(RngStateCount);
 | 
			
		||||
    int bytes = sizeof(RngStateType)*saved.size();
 | 
			
		||||
    std::vector<int> gcoor;
 | 
			
		||||
 | 
			
		||||
    for(int gidx=0;gidx<gsites;gidx++){
 | 
			
		||||
 | 
			
		||||
      int rank,o_idx,i_idx;
 | 
			
		||||
      grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
 | 
			
		||||
      int l_idx=parallel.generator_idx(o_idx,i_idx);
 | 
			
		||||
 | 
			
		||||
      if( rank == grid->ThisRank() ){
 | 
			
		||||
	//	std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
 | 
			
		||||
	parallel.GetState(saved,l_idx);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Broadcast(rank,(void *)&saved[0],bytes);
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
	fout.write((char *)&saved[0],bytes);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( grid->IsBoss() ) {
 | 
			
		||||
      serial.GetState(saved,0);
 | 
			
		||||
      Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
      fout.write((char *)&saved[0],bytes);
 | 
			
		||||
    }
 | 
			
		||||
    grid->Broadcast(0,(void *)&csum,sizeof(csum));
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
  static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset)
 | 
			
		||||
  {
 | 
			
		||||
    typedef typename GridSerialRNG::RngStateType RngStateType;
 | 
			
		||||
    const int RngStateCount = GridSerialRNG::RngStateCount;
 | 
			
		||||
 | 
			
		||||
    GridBase *grid = parallel._grid;
 | 
			
		||||
    int gsites = grid->_gsites;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    // Serialise through node zero
 | 
			
		||||
    //////////////////////////////////////////////////
 | 
			
		||||
    std::cout<< GridLogMessage<< "Serial RNG read I/O "<< file<<std::endl;
 | 
			
		||||
 | 
			
		||||
    std::ifstream fin(file,std::ios::binary|std::ios::in);
 | 
			
		||||
    fin.seekg(offset);
 | 
			
		||||
    
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    std::vector<RngStateType> saved(RngStateCount);
 | 
			
		||||
    int bytes = sizeof(RngStateType)*saved.size();
 | 
			
		||||
    std::vector<int> gcoor;
 | 
			
		||||
 | 
			
		||||
    for(int gidx=0;gidx<gsites;gidx++){
 | 
			
		||||
 | 
			
		||||
      int rank,o_idx,i_idx;
 | 
			
		||||
      grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
 | 
			
		||||
      int l_idx=parallel.generator_idx(o_idx,i_idx);
 | 
			
		||||
 | 
			
		||||
      if ( grid->IsBoss() ) {
 | 
			
		||||
	fin.read((char *)&saved[0],bytes);
 | 
			
		||||
	Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Broadcast(0,(void *)&saved[0],bytes);
 | 
			
		||||
 | 
			
		||||
      if( rank == grid->ThisRank() ){
 | 
			
		||||
	parallel.SetState(saved,l_idx);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if ( grid->IsBoss() ) {
 | 
			
		||||
      fin.read((char *)&saved[0],bytes);
 | 
			
		||||
      serial.SetState(saved,0);
 | 
			
		||||
      Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    grid->Broadcast(0,(void *)&csum,sizeof(csum));
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  template<class vobj,class fobj,class munger>
 | 
			
		||||
  static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
 | 
			
		||||
  {
 | 
			
		||||
@@ -266,7 +413,7 @@ class BinaryIO {
 | 
			
		||||
    int IOnode = 1;
 | 
			
		||||
    for(int d=0;d<grid->_ndimension;d++) {
 | 
			
		||||
 | 
			
		||||
      if ( d==0 ) parallel[d] = 0;
 | 
			
		||||
      if ( d == 0 ) parallel[d] = 0;
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
	start[d] = grid->_processor_coor[d]*range[d];
 | 
			
		||||
@@ -284,6 +431,7 @@ class BinaryIO {
 | 
			
		||||
    {
 | 
			
		||||
      uint32_t tmp = IOnode;
 | 
			
		||||
      grid->GlobalSum(tmp);
 | 
			
		||||
      std::cout<< std::dec ;
 | 
			
		||||
      std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
 | 
			
		||||
      for(int d=0;d<grid->_ndimension;d++){
 | 
			
		||||
	std::cout<< range[d];
 | 
			
		||||
@@ -293,6 +441,9 @@ class BinaryIO {
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GridStopWatch timer; timer.Start();
 | 
			
		||||
    uint64_t bytes=0;
 | 
			
		||||
 | 
			
		||||
    int myrank = grid->ThisRank();
 | 
			
		||||
    int iorank = grid->RankFromProcessorCoor(ioproc);
 | 
			
		||||
 | 
			
		||||
@@ -306,9 +457,9 @@ class BinaryIO {
 | 
			
		||||
    // available (how short sighted is that?)
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    Umu = zero;
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    static uint32_t csum=0;
 | 
			
		||||
    fobj fileObj;
 | 
			
		||||
    sobj siteObj;
 | 
			
		||||
    static sobj siteObj; // Static to place in symmetric region for SHMEM
 | 
			
		||||
 | 
			
		||||
      // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
@@ -318,7 +469,7 @@ class BinaryIO {
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
      std::vector<int> iosite(nd);
 | 
			
		||||
 | 
			
		||||
      grid->CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
      Lexicographic::CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
@@ -328,7 +479,7 @@ class BinaryIO {
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
      // Get the rank of owner of data
 | 
			
		||||
      /////////////////////////
 | 
			
		||||
	int rank, o_idx,i_idx, g_idx;
 | 
			
		||||
      int rank, o_idx,i_idx, g_idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite);
 | 
			
		||||
      grid->GlobalCoorToGlobalIndex(gsite,g_idx);
 | 
			
		||||
      
 | 
			
		||||
@@ -339,6 +490,7 @@ class BinaryIO {
 | 
			
		||||
	
 | 
			
		||||
	fin.seekg(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fin.read((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	bytes+=sizeof(fileObj);
 | 
			
		||||
	
 | 
			
		||||
	if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	if(ieee32)    le32toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
@@ -346,23 +498,29 @@ class BinaryIO {
 | 
			
		||||
	if(ieee64)    le64toh_v((void *)&fileObj,sizeof(fileObj));
 | 
			
		||||
	
 | 
			
		||||
	munge(fileObj,siteObj,csum);
 | 
			
		||||
	
 | 
			
		||||
	if ( rank != myrank ) {
 | 
			
		||||
	  grid->SendTo((void *)&siteObj,rank,sizeof(siteObj));
 | 
			
		||||
	} else { 
 | 
			
		||||
	  pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
 | 
			
		||||
      }	
 | 
			
		||||
 | 
			
		||||
      // Possibly do transport through pt2pt 
 | 
			
		||||
      if ( rank != iorank ) { 
 | 
			
		||||
	if ( (myrank == rank) || (myrank==iorank) ) {
 | 
			
		||||
	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
 | 
			
		||||
	}
 | 
			
		||||
	 
 | 
			
		||||
      } else { 
 | 
			
		||||
	if ( myrank == rank ) {
 | 
			
		||||
	  grid->RecvFrom((void *)&siteObj,iorank,sizeof(siteObj));
 | 
			
		||||
      }
 | 
			
		||||
      // Poke at destination
 | 
			
		||||
      if ( myrank == rank ) {
 | 
			
		||||
	  pokeLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	} 
 | 
			
		||||
      }
 | 
			
		||||
      grid->Barrier(); // necessary?
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    grid->GlobalSum(csum);
 | 
			
		||||
    grid->GlobalSum(bytes);
 | 
			
		||||
    grid->Barrier();
 | 
			
		||||
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
    
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
@@ -397,7 +555,7 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
    for(int d=0;d<grid->_ndimension;d++) {
 | 
			
		||||
 | 
			
		||||
      if ( d==0 ) parallel[d] = 0;
 | 
			
		||||
      if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
 | 
			
		||||
 | 
			
		||||
      if (parallel[d]) {
 | 
			
		||||
	range[d] = grid->_ldimensions[d];
 | 
			
		||||
@@ -426,6 +584,9 @@ class BinaryIO {
 | 
			
		||||
      std::cout << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GridStopWatch timer; timer.Start();
 | 
			
		||||
    uint64_t bytes=0;
 | 
			
		||||
 | 
			
		||||
    int myrank = grid->ThisRank();
 | 
			
		||||
    int iorank = grid->RankFromProcessorCoor(ioproc);
 | 
			
		||||
 | 
			
		||||
@@ -444,10 +605,10 @@ class BinaryIO {
 | 
			
		||||
 | 
			
		||||
    uint32_t csum=0;
 | 
			
		||||
    fobj fileObj;
 | 
			
		||||
    sobj siteObj;
 | 
			
		||||
    static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate with AlignedAllocator
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    // should aggregate a whole chunk and then write.
 | 
			
		||||
    // need to implement these loops in Nd independent way with a lexico conversion
 | 
			
		||||
    for(int tlex=0;tlex<slice_vol;tlex++){
 | 
			
		||||
	
 | 
			
		||||
      std::vector<int> tsite(nd); // temporary mixed up site
 | 
			
		||||
@@ -455,7 +616,7 @@ class BinaryIO {
 | 
			
		||||
      std::vector<int> lsite(nd);
 | 
			
		||||
      std::vector<int> iosite(nd);
 | 
			
		||||
 | 
			
		||||
      grid->CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
      Lexicographic::CoorFromIndex(tsite,tlex,range);
 | 
			
		||||
 | 
			
		||||
      for(int d=0;d<nd;d++){
 | 
			
		||||
	lsite[d] = tsite[d]%grid->_ldimensions[d];  // local site
 | 
			
		||||
@@ -473,13 +634,21 @@ class BinaryIO {
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      // iorank writes from the seek
 | 
			
		||||
      ////////////////////////////////
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
      
 | 
			
		||||
      // Owner of data peeks it
 | 
			
		||||
      peekLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
 | 
			
		||||
	if ( rank != myrank ) {
 | 
			
		||||
	  grid->RecvFrom((void *)&siteObj,rank,sizeof(siteObj));
 | 
			
		||||
	} else { 
 | 
			
		||||
	  peekLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
      // Pair of nodes may need to do pt2pt send
 | 
			
		||||
      if ( rank != iorank ) { // comms is necessary
 | 
			
		||||
	if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
 | 
			
		||||
	  // Send to IOrank 
 | 
			
		||||
	  grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      grid->Barrier(); // necessary?
 | 
			
		||||
 | 
			
		||||
      if (myrank == iorank) {
 | 
			
		||||
	
 | 
			
		||||
	munge(siteObj,fileObj,csum);
 | 
			
		||||
 | 
			
		||||
@@ -490,17 +659,16 @@ class BinaryIO {
 | 
			
		||||
	
 | 
			
		||||
	fout.seekp(offset+g_idx*sizeof(fileObj));
 | 
			
		||||
	fout.write((char *)&fileObj,sizeof(fileObj));
 | 
			
		||||
 | 
			
		||||
      } else { 
 | 
			
		||||
	if ( myrank == rank ) {
 | 
			
		||||
	  peekLocalSite(siteObj,Umu,lsite);
 | 
			
		||||
	  grid->SendTo((void *)&siteObj,iorank,sizeof(siteObj));
 | 
			
		||||
	} 
 | 
			
		||||
	bytes+=sizeof(fileObj);
 | 
			
		||||
      }
 | 
			
		||||
      grid->Barrier(); // necessary// or every 16 packets to rate throttle??
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    grid->GlobalSum(csum);
 | 
			
		||||
    grid->GlobalSum(bytes);
 | 
			
		||||
 | 
			
		||||
    timer.Stop();
 | 
			
		||||
    std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
 | 
			
		||||
	     << (double)bytes/timer.useconds() <<" MB/s "  <<std::endl;
 | 
			
		||||
 | 
			
		||||
    return csum;
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,32 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/parallelIO/NerscIO.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Matt Spraggs <matthew.spraggs@gmail.com>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_NERSC_IO_H
 | 
			
		||||
#define GRID_NERSC_IO_H
 | 
			
		||||
 | 
			
		||||
@@ -51,7 +80,6 @@ class NerscField {
 | 
			
		||||
    std::string floating_point;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Bit and Physical Checksumming and QA of data
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -69,8 +97,9 @@ inline void NerscGrid(GridBase *grid,NerscField &header)
 | 
			
		||||
template<class GaugeField>
 | 
			
		||||
inline void NerscStatistics(GaugeField & data,NerscField &header)
 | 
			
		||||
{
 | 
			
		||||
  header.link_trace=Grid::QCD::WilsonLoops<GaugeField>::linkTrace(data);
 | 
			
		||||
  header.plaquette =Grid::QCD::WilsonLoops<GaugeField>::avgPlaquette(data);
 | 
			
		||||
  // How to convert data precision etc...
 | 
			
		||||
  header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
 | 
			
		||||
  header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline void NerscMachineCharacteristics(NerscField &header)
 | 
			
		||||
@@ -181,38 +210,41 @@ inline void NerscMachineCharacteristics(NerscField &header)
 | 
			
		||||
class NerscIO : public BinaryIO { 
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  static inline void truncate(std::string file){
 | 
			
		||||
    std::ofstream fout(file,std::ios::out);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  #define dump_nersc_header(field, s)\
 | 
			
		||||
  s << "BEGIN_HEADER"      << std::endl;\
 | 
			
		||||
  s << "HDR_VERSION = "    << field.hdr_version    << std::endl;\
 | 
			
		||||
  s << "DATATYPE = "       << field.data_type      << std::endl;\
 | 
			
		||||
  s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\
 | 
			
		||||
  for(int i=0;i<4;i++){\
 | 
			
		||||
    s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\
 | 
			
		||||
  }\
 | 
			
		||||
  s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\
 | 
			
		||||
  s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;\
 | 
			
		||||
  for(int i=0;i<4;i++){\
 | 
			
		||||
    s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\
 | 
			
		||||
  }\
 | 
			
		||||
  \
 | 
			
		||||
  s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\
 | 
			
		||||
  s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;\
 | 
			
		||||
  s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;\
 | 
			
		||||
  s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;\
 | 
			
		||||
  s << "CREATOR = "         << field.creator          << std::endl;\
 | 
			
		||||
  s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\
 | 
			
		||||
  s << "CREATION_DATE = "   << field.creation_date    << std::endl;\
 | 
			
		||||
  s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;\
 | 
			
		||||
  s << "FLOATING_POINT = "  << field.floating_point   << std::endl;\
 | 
			
		||||
  s << "END_HEADER"         << std::endl;
 | 
			
		||||
  
 | 
			
		||||
  static inline unsigned int writeHeader(NerscField &field,std::string file)
 | 
			
		||||
  {
 | 
			
		||||
    std::ofstream fout(file,std::ios::out);
 | 
			
		||||
    std::ofstream fout(file,std::ios::out|std::ios::in);
 | 
			
		||||
  
 | 
			
		||||
    fout.seekp(0,std::ios::beg);
 | 
			
		||||
    fout << "BEGIN_HEADER"      << std::endl;
 | 
			
		||||
    fout << "HDR_VERSION = "    << field.hdr_version    << std::endl;
 | 
			
		||||
    fout << "DATATYPE = "       << field.data_type      << std::endl;
 | 
			
		||||
    fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
 | 
			
		||||
    }
 | 
			
		||||
    // just to keep the space and write it later
 | 
			
		||||
    fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
 | 
			
		||||
    fout << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::endl;
 | 
			
		||||
    fout << std::dec;
 | 
			
		||||
 | 
			
		||||
    fout << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;
 | 
			
		||||
    fout << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;
 | 
			
		||||
    fout << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;
 | 
			
		||||
    fout << "CREATOR = "         << field.creator          << std::endl;
 | 
			
		||||
    fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
 | 
			
		||||
    fout << "CREATION_DATE = "   << field.creation_date    << std::endl;
 | 
			
		||||
    fout << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;
 | 
			
		||||
    fout << "FLOATING_POINT = "  << field.floating_point   << std::endl;
 | 
			
		||||
    fout << "END_HEADER"         << std::endl;
 | 
			
		||||
    dump_nersc_header(field, fout);
 | 
			
		||||
    field.data_start = fout.tellp();
 | 
			
		||||
    return field.data_start;
 | 
			
		||||
}
 | 
			
		||||
@@ -287,9 +319,6 @@ static inline int readHeader(std::string file,GridBase *grid,  NerscField &field
 | 
			
		||||
  return field.data_start;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Now the meat: the object readers
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -317,17 +346,17 @@ static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
 | 
			
		||||
  if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
 | 
			
		||||
    if ( ieee32 || ieee32big ) {
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
	csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> 
 | 
			
		||||
	(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
 | 
			
		||||
    }
 | 
			
		||||
    if ( ieee64 || ieee64big ) {
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
      //csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> 
 | 
			
		||||
	(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
      	(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
    }
 | 
			
		||||
  } else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) {
 | 
			
		||||
  } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
 | 
			
		||||
    if ( ieee32 || ieee32big ) {
 | 
			
		||||
      //      csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
      //csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
      csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
 | 
			
		||||
	(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
 | 
			
		||||
    }
 | 
			
		||||
@@ -344,6 +373,7 @@ static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
 | 
			
		||||
 | 
			
		||||
  assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
 | 
			
		||||
  assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
 | 
			
		||||
 | 
			
		||||
  assert(csum == header.checksum );
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Read NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
 | 
			
		||||
@@ -377,6 +407,8 @@ static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu
 | 
			
		||||
  uint32_t csum;
 | 
			
		||||
  int offset;
 | 
			
		||||
  
 | 
			
		||||
  truncate(file);
 | 
			
		||||
 | 
			
		||||
  if ( two_row ) { 
 | 
			
		||||
 | 
			
		||||
    header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
@@ -389,28 +421,105 @@ static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu
 | 
			
		||||
    std::string file1 = file+"para";
 | 
			
		||||
    int offset1 = writeHeader(header,file1);
 | 
			
		||||
    int csum1=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
 | 
			
		||||
    //int csum1=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file1,munge,offset,header.floating_point);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogMessage << " TESTING PARALLEL WRITE offsets " << offset1 << " "<< offset << std::endl;
 | 
			
		||||
    std::cout << GridLogMessage <<std::hex<< " TESTING PARALLEL WRITE csums   " << csum1 << " "<< csum << std::endl;
 | 
			
		||||
    std::cout << std::dec;
 | 
			
		||||
    std::cout << GridLogMessage << " TESTING PARALLEL WRITE csums   " << csum1 << " "<<std::hex<< csum << std::dec<< std::endl;
 | 
			
		||||
 | 
			
		||||
    assert(offset1==offset);  
 | 
			
		||||
    assert(csum1==csum);  
 | 
			
		||||
 | 
			
		||||
  } else { 
 | 
			
		||||
    header.floating_point = std::string("IEEE64BIG");
 | 
			
		||||
    header.data_type      = std::string("4D_SU3_GAUGE_3X3");
 | 
			
		||||
    header.data_type      = std::string("4D_SU3_GAUGE_3x3");
 | 
			
		||||
    NerscSimpleUnmunger<fobj3D,sobj> munge;
 | 
			
		||||
    BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
 | 
			
		||||
    offset = writeHeader(header,file);
 | 
			
		||||
    csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
 | 
			
		||||
    //    csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
 | 
			
		||||
    csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
 | 
			
		||||
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////
 | 
			
		||||
    // RNG state
 | 
			
		||||
    ///////////////////////////////
 | 
			
		||||
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename GridParallelRNG::RngStateType RngStateType;
 | 
			
		||||
 | 
			
		||||
  // Following should become arguments
 | 
			
		||||
  NerscField header;
 | 
			
		||||
  header.sequence_number = 1;
 | 
			
		||||
  header.ensemble_id     = "UKQCD";
 | 
			
		||||
  header.ensemble_label  = "DWF";
 | 
			
		||||
 | 
			
		||||
  GridBase *grid = parallel._grid;
 | 
			
		||||
 | 
			
		||||
  NerscGrid(grid,header);
 | 
			
		||||
  header.link_trace=0.0;
 | 
			
		||||
  header.plaquette=0.0;
 | 
			
		||||
  NerscMachineCharacteristics(header);
 | 
			
		||||
 | 
			
		||||
  uint32_t csum;
 | 
			
		||||
  int offset;
 | 
			
		||||
  
 | 
			
		||||
#ifdef RNG_RANLUX
 | 
			
		||||
    header.floating_point = std::string("UINT64");
 | 
			
		||||
    header.data_type      = std::string("RANLUX48");
 | 
			
		||||
#else
 | 
			
		||||
    header.floating_point = std::string("UINT32");
 | 
			
		||||
    header.data_type      = std::string("MT19937");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  truncate(file);
 | 
			
		||||
  offset = writeHeader(header,file);
 | 
			
		||||
  csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset);
 | 
			
		||||
  header.checksum = csum;
 | 
			
		||||
  offset = writeHeader(header,file);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
 }
 | 
			
		||||
    
 | 
			
		||||
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename GridParallelRNG::RngStateType RngStateType;
 | 
			
		||||
 | 
			
		||||
  GridBase *grid = parallel._grid;
 | 
			
		||||
 | 
			
		||||
  int offset = readHeader(file,grid,header);
 | 
			
		||||
 | 
			
		||||
  NerscField clone(header);
 | 
			
		||||
 | 
			
		||||
  std::string format(header.floating_point);
 | 
			
		||||
  std::string data_type(header.data_type);
 | 
			
		||||
 | 
			
		||||
#ifdef RNG_RANLUX
 | 
			
		||||
  assert(format == std::string("UINT64"));
 | 
			
		||||
  assert(data_type == std::string("RANLUX48"));
 | 
			
		||||
#else
 | 
			
		||||
  assert(format == std::string("UINT32"));
 | 
			
		||||
  assert(data_type == std::string("MT19937"));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  // depending on datatype, set up munger;
 | 
			
		||||
  // munger is a function of <floating point, Real, data_type>
 | 
			
		||||
  uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset);
 | 
			
		||||
 | 
			
		||||
  std::cerr<<" Csum "<< csum << " "<< header.checksum <<std::endl;
 | 
			
		||||
 | 
			
		||||
  assert(csum == header.checksum );
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user