mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Compare commits
	
		
			136 Commits
		
	
	
		
			249c7aee39
			...
			feature/gp
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					da59379612 | ||
| 
						 | 
					3ef2a41518 | ||
| 
						 | 
					aa96f420c6 | ||
| 
						 | 
					49e9e4ed0e | ||
| 
						 | 
					f7b8163016 | ||
| 
						 | 
					93769eacd3 | ||
| 
						 | 
					59b0cc11df | ||
| 
						 | 
					f32c275376 | ||
| 
						 | 
					5404fc66ab | ||
| 
						 | 
					1f53458af8 | ||
| 
						 | 
					434c3e7f1d | ||
| 
						 | 
					500b119f3d | ||
| 
						 | 
					4b87259c1b | ||
| 
						 | 
					503dec34ef | ||
| 
						 | 
					d1e9fe50d2 | ||
| 
						 | 
					d01e5fa838 | ||
| 
						 | 
					a477c25e8c | ||
| 
						 | 
					1bd20cd9e8 | ||
| 
						 | 
					e49e95b037 | ||
| 
						 | 
					6f59fed563 | ||
| 
						 | 
					60b7f6c99d | ||
| 
						 | 
					b92dfcc8d3 | ||
| 
						 | 
					f6fd6dd053 | ||
| 
						 | 
					79ad567dd5 | ||
| 
						 | 
					fab1efb48c | ||
| 
						 | 
					660eb76d93 | ||
| 
						 | 
					62e7bf024a | ||
| 
						 | 
					95f3d69cf9 | ||
| 89c0519f83 | |||
| 2704b82084 | |||
| cf8632bbac | |||
| d224297972 | |||
| 
						 | 
					a4d11a630f | ||
| 2b4399f8b1 | |||
| f17b8de907 | |||
| 
						 | 
					7e5bd46dd3 | ||
| 
						 | 
					228bbb9d81 | ||
| b812a7b4c6 | |||
| 891a366f73 | |||
| 10116b3be8 | |||
| a46a0f0882 | |||
| a26a8a38f4 | |||
| 7435315d50 | |||
| 9b5f741e85 | |||
| 517822fdd2 | |||
| 1b93a9be88 | |||
| 783a66b348 | |||
| 976c3e9b59 | |||
| f8ca971dae | |||
| 21bc8c24df | |||
| 30228214f7 | |||
| 
						 | 
					2ae980ae43 | ||
| 
						 | 
					6153dec2e4 | ||
| 
						 | 
					c805f86343 | ||
| 
						 | 
					04ca065281 | ||
| 
						 | 
					88d8fa43d7 | ||
| 
						 | 
					3c49762875 | ||
| 
						 | 
					436bf1d9d3 | ||
| 
						 | 
					f70df6e195 | ||
| 
						 | 
					fce3852dff | ||
| 
						 | 
					ee1b8bbdbd | ||
| 
						 | 
					3f1636637d | ||
| 
						 | 
					3c9012676a | ||
| 
						 | 
					b507fe209c | ||
| 
						 | 
					6cd2d8fcd5 | ||
| 
						 | 
					b02d022993 | ||
| 
						 | 
					94581e3c7a | ||
| 
						 | 
					88b52cc045 | ||
| 
						 | 
					0a816b5509 | ||
| 
						 | 
					1c8b807c2e | ||
| 97f7a9ecb3 | |||
| 
						 | 
					15878f7613 | ||
| 
						 | 
					e0d5e3c6c7 | ||
| 
						 | 
					6f3455900e | ||
| 
						 | 
					56827d6ad6 | ||
| e4a641b64e | |||
| 8849f187f1 | |||
| 
						 | 
					db420525b3 | ||
| 
						 | 
					b5659d106e | ||
| 
						 | 
					4b43307402 | ||
| 
						 | 
					09af8c25a2 | ||
| 
						 | 
					9514035b87 | ||
| 
						 | 
					2da09ae99b | ||
| 
						 | 
					a38fb0e04a | ||
| 
						 | 
					1514b4f137 | ||
| 
						 | 
					0a6e2f42c5 | ||
| 
						 | 
					ab2de131bd | ||
| 
						 | 
					5af8da76d7 | ||
| 
						 | 
					b8b9dc952d | ||
| 
						 | 
					79a6ed32d8 | ||
| 
						 | 
					caa5f97723 | ||
| 
						 | 
					4924b3209e | ||
| 
						 | 
					00f24f8765 | ||
| 
						 | 
					f5b3d582b0 | ||
| 
						 | 
					981c93d67a | ||
| 
						 | 
					c020b78e02 | ||
| 
						 | 
					9cd4128833 | ||
| 
						 | 
					c8b17c9526 | ||
| 
						 | 
					2ae2a81e85 | ||
| 
						 | 
					69c869d345 | ||
| 
						 | 
					df9b958c40 | ||
| 
						 | 
					3d3376d1a3 | ||
| 
						 | 
					21ed6ac0f4 | ||
| 
						 | 
					7bb8ab7000 | ||
| 
						 | 
					2c824c2641 | ||
| 
						 | 
					391fd9cc6a | ||
| 
						 | 
					bf4369f72d | ||
| 
						 | 
					36600899e2 | ||
| 
						 | 
					b9c70d156b | ||
| 
						 | 
					eb89579fe7 | ||
| 
						 | 
					0cfd13d18b | ||
| 
						 | 
					63d9b8e8a3 | ||
| 
						 | 
					d247031c98 | ||
| 
						 | 
					affff3865f | ||
| 
						 | 
					9c22655b5a | ||
| 
						 | 
					99d879ea7f | ||
| 
						 | 
					9d263d9a7d | ||
| 
						 | 
					9015c229dc | ||
| 
						 | 
					a7eabaad56 | ||
| 
						 | 
					eeb4703b84 | ||
| 
						 | 
					a07421b3d3 | ||
| 
						 | 
					cda53b4068 | ||
| 
						 | 
					df99f227c1 | ||
| 
						 | 
					d536c67b9d | ||
| 
						 | 
					f44f005dad | ||
| 
						 | 
					26b2caf570 | ||
| 
						 | 
					8bb078db25 | ||
| 
						 | 
					b61ba40023 | ||
| 
						 | 
					14d352ea4f | ||
| 
						 | 
					1cf9ec1cce | ||
| 
						 | 
					4b994a1bc7 | ||
| 
						 | 
					e506d6d369 | ||
| 
						 | 
					ab56ad8d7a | ||
| 
						 | 
					3825329f8e | ||
| 
						 | 
					c7bdf2c0e4 | ||
| 
						 | 
					bf91778550 | 
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,3 +1,7 @@
 | 
				
			|||||||
 | 
					# Doxygen stuff
 | 
				
			||||||
 | 
					html/*
 | 
				
			||||||
 | 
					latex/*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Compiled Object files #
 | 
					# Compiled Object files #
 | 
				
			||||||
#########################
 | 
					#########################
 | 
				
			||||||
*.slo
 | 
					*.slo
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,7 +34,7 @@
 | 
				
			|||||||
#pragma push_macro("__SYCL_DEVICE_ONLY__")
 | 
					#pragma push_macro("__SYCL_DEVICE_ONLY__")
 | 
				
			||||||
#undef __SYCL_DEVICE_ONLY__
 | 
					#undef __SYCL_DEVICE_ONLY__
 | 
				
			||||||
#define EIGEN_DONT_VECTORIZE
 | 
					#define EIGEN_DONT_VECTORIZE
 | 
				
			||||||
//#undef EIGEN_USE_SYCL
 | 
					#undef EIGEN_USE_SYCL
 | 
				
			||||||
#define __SYCL__REDEFINE__
 | 
					#define __SYCL__REDEFINE__
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#define _GRID_FFT_H_
 | 
					#define _GRID_FFT_H_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef HAVE_FFTW
 | 
					#ifdef HAVE_FFTW
 | 
				
			||||||
#ifdef USE_MKL
 | 
					#if defined(USE_MKL) || defined(GRID_SYCL)
 | 
				
			||||||
#include <fftw/fftw3.h>
 | 
					#include <fftw/fftw3.h>
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#include <fftw3.h>
 | 
					#include <fftw3.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -293,7 +293,7 @@ static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
 | 
				
			|||||||
 * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
 | 
					 * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
 | 
				
			||||||
 * type = 1 for the approximation which is infinite at x = 0. */
 | 
					 * type = 1 for the approximation which is infinite at x = 0. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
 | 
					zolotarev_data* zolotarev(ZOLO_PRECISION epsilon, int n, int type) {
 | 
				
			||||||
  INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
 | 
					  INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
 | 
				
			||||||
    l, invlambda, xi, xisq, *tv, s, opl;
 | 
					    l, invlambda, xi, xisq, *tv, s, opl;
 | 
				
			||||||
  int m, czero, ts;
 | 
					  int m, czero, ts;
 | 
				
			||||||
@@ -375,12 +375,12 @@ zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
 | 
				
			|||||||
  construct_partfrac(d);
 | 
					  construct_partfrac(d);
 | 
				
			||||||
  construct_contfrac(d);
 | 
					  construct_contfrac(d);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /* Converting everything to PRECISION for external use only */
 | 
					  /* Converting everything to ZOLO_PRECISION for external use only */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
 | 
					  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
 | 
				
			||||||
  zd -> A = (PRECISION) d -> A;
 | 
					  zd -> A = (ZOLO_PRECISION) d -> A;
 | 
				
			||||||
  zd -> Delta = (PRECISION) d -> Delta;
 | 
					  zd -> Delta = (ZOLO_PRECISION) d -> Delta;
 | 
				
			||||||
  zd -> epsilon = (PRECISION) d -> epsilon;
 | 
					  zd -> epsilon = (ZOLO_PRECISION) d -> epsilon;
 | 
				
			||||||
  zd -> n = d -> n;
 | 
					  zd -> n = d -> n;
 | 
				
			||||||
  zd -> type = d -> type;
 | 
					  zd -> type = d -> type;
 | 
				
			||||||
  zd -> dn = d -> dn;
 | 
					  zd -> dn = d -> dn;
 | 
				
			||||||
@@ -390,24 +390,24 @@ zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
 | 
				
			|||||||
  zd -> deg_num = d -> deg_num;
 | 
					  zd -> deg_num = d -> deg_num;
 | 
				
			||||||
  zd -> deg_denom = d -> deg_denom;
 | 
					  zd -> deg_denom = d -> deg_denom;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
 | 
					  zd -> a = (ZOLO_PRECISION*) malloc(zd -> dn * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
 | 
					  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (ZOLO_PRECISION) d -> a[m];
 | 
				
			||||||
  free(d -> a);
 | 
					  free(d -> a);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
 | 
					  zd -> ap = (ZOLO_PRECISION*) malloc(zd -> dd * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
 | 
					  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (ZOLO_PRECISION) d -> ap[m];
 | 
				
			||||||
  free(d -> ap);
 | 
					  free(d -> ap);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
 | 
					  zd -> alpha = (ZOLO_PRECISION*) malloc(zd -> da * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
 | 
					  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (ZOLO_PRECISION) d -> alpha[m];
 | 
				
			||||||
  free(d -> alpha);
 | 
					  free(d -> alpha);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
 | 
					  zd -> beta = (ZOLO_PRECISION*) malloc(zd -> db * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
 | 
					  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (ZOLO_PRECISION) d -> beta[m];
 | 
				
			||||||
  free(d -> beta);
 | 
					  free(d -> beta);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
 | 
					  zd -> gamma = (ZOLO_PRECISION*) malloc(zd -> n * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
 | 
					  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (ZOLO_PRECISION) d -> gamma[m];
 | 
				
			||||||
  free(d -> gamma);
 | 
					  free(d -> gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  free(d);
 | 
					  free(d);
 | 
				
			||||||
@@ -426,7 +426,7 @@ void zolotarev_free(zolotarev_data *zdata)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
zolotarev_data* higham(PRECISION epsilon, int n) {
 | 
					zolotarev_data* higham(ZOLO_PRECISION epsilon, int n) {
 | 
				
			||||||
  INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
 | 
					  INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
 | 
				
			||||||
  int m, czero;
 | 
					  int m, czero;
 | 
				
			||||||
  zolotarev_data *zd;
 | 
					  zolotarev_data *zd;
 | 
				
			||||||
@@ -481,9 +481,9 @@ zolotarev_data* higham(PRECISION epsilon, int n) {
 | 
				
			|||||||
  /* Converting everything to PRECISION for external use only */
 | 
					  /* Converting everything to PRECISION for external use only */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
 | 
					  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
 | 
				
			||||||
  zd -> A = (PRECISION) d -> A;
 | 
					  zd -> A = (ZOLO_PRECISION) d -> A;
 | 
				
			||||||
  zd -> Delta = (PRECISION) d -> Delta;
 | 
					  zd -> Delta = (ZOLO_PRECISION) d -> Delta;
 | 
				
			||||||
  zd -> epsilon = (PRECISION) d -> epsilon;
 | 
					  zd -> epsilon = (ZOLO_PRECISION) d -> epsilon;
 | 
				
			||||||
  zd -> n = d -> n;
 | 
					  zd -> n = d -> n;
 | 
				
			||||||
  zd -> type = d -> type;
 | 
					  zd -> type = d -> type;
 | 
				
			||||||
  zd -> dn = d -> dn;
 | 
					  zd -> dn = d -> dn;
 | 
				
			||||||
@@ -493,24 +493,24 @@ zolotarev_data* higham(PRECISION epsilon, int n) {
 | 
				
			|||||||
  zd -> deg_num = d -> deg_num;
 | 
					  zd -> deg_num = d -> deg_num;
 | 
				
			||||||
  zd -> deg_denom = d -> deg_denom;
 | 
					  zd -> deg_denom = d -> deg_denom;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
 | 
					  zd -> a = (ZOLO_PRECISION*) malloc(zd -> dn * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
 | 
					  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (ZOLO_PRECISION) d -> a[m];
 | 
				
			||||||
  free(d -> a);
 | 
					  free(d -> a);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
 | 
					  zd -> ap = (ZOLO_PRECISION*) malloc(zd -> dd * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
 | 
					  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (ZOLO_PRECISION) d -> ap[m];
 | 
				
			||||||
  free(d -> ap);
 | 
					  free(d -> ap);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
 | 
					  zd -> alpha = (ZOLO_PRECISION*) malloc(zd -> da * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
 | 
					  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (ZOLO_PRECISION) d -> alpha[m];
 | 
				
			||||||
  free(d -> alpha);
 | 
					  free(d -> alpha);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
 | 
					  zd -> beta = (ZOLO_PRECISION*) malloc(zd -> db * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
 | 
					  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (ZOLO_PRECISION) d -> beta[m];
 | 
				
			||||||
  free(d -> beta);
 | 
					  free(d -> beta);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
 | 
					  zd -> gamma = (ZOLO_PRECISION*) malloc(zd -> n * sizeof(ZOLO_PRECISION));
 | 
				
			||||||
  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
 | 
					  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (ZOLO_PRECISION) d -> gamma[m];
 | 
				
			||||||
  free(d -> gamma);
 | 
					  free(d -> gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  free(d);
 | 
					  free(d);
 | 
				
			||||||
@@ -523,17 +523,17 @@ NAMESPACE_END(Grid);
 | 
				
			|||||||
#ifdef TEST
 | 
					#ifdef TEST
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#undef ZERO
 | 
					#undef ZERO
 | 
				
			||||||
#define ZERO ((PRECISION) 0)
 | 
					#define ZERO ((ZOLO_PRECISION) 0)
 | 
				
			||||||
#undef ONE
 | 
					#undef ONE
 | 
				
			||||||
#define ONE ((PRECISION) 1)
 | 
					#define ONE ((ZOLO_PRECISION) 1)
 | 
				
			||||||
#undef TWO
 | 
					#undef TWO
 | 
				
			||||||
#define TWO ((PRECISION) 2)
 | 
					#define TWO ((ZOLO_PRECISION) 2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Evaluate the rational approximation R(x) using the factored form */
 | 
					/* Evaluate the rational approximation R(x) using the factored form */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
 | 
					static ZOLO_PRECISION zolotarev_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
 | 
				
			||||||
  int m;
 | 
					  int m;
 | 
				
			||||||
  PRECISION R;
 | 
					  ZOLO_PRECISION R;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (rdata -> type == 0) {
 | 
					  if (rdata -> type == 0) {
 | 
				
			||||||
    R = rdata -> A * x;
 | 
					    R = rdata -> A * x;
 | 
				
			||||||
@@ -551,9 +551,9 @@ static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
/* Evaluate the rational approximation R(x) using the partial fraction form */
 | 
					/* Evaluate the rational approximation R(x) using the partial fraction form */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
 | 
					static ZOLO_PRECISION zolotarev_partfrac_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
 | 
				
			||||||
  int m;
 | 
					  int m;
 | 
				
			||||||
  PRECISION R = rdata -> alpha[rdata -> da - 1];
 | 
					  ZOLO_PRECISION R = rdata -> alpha[rdata -> da - 1];
 | 
				
			||||||
  for (m = 0; m < rdata -> dd; m++)
 | 
					  for (m = 0; m < rdata -> dd; m++)
 | 
				
			||||||
    R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
 | 
					    R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
 | 
				
			||||||
  if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
 | 
					  if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
 | 
				
			||||||
@@ -568,18 +568,18 @@ static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
 | 
				
			|||||||
 * non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
 | 
					 * non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
 | 
				
			||||||
 * but with signalling overflow you will get an error message. */
 | 
					 * but with signalling overflow you will get an error message. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PRECISION zolotarev_contfrac_eval(PRECISION x, zolotarev_data* rdata) {
 | 
					static ZOLO_PRECISION zolotarev_contfrac_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
 | 
				
			||||||
  int m;
 | 
					  int m;
 | 
				
			||||||
  PRECISION R = rdata -> beta[0] * x;
 | 
					  ZOLO_PRECISION R = rdata -> beta[0] * x;
 | 
				
			||||||
  for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
 | 
					  for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
 | 
				
			||||||
  return R;
 | 
					  return R;
 | 
				
			||||||
}    
 | 
					}    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Evaluate the rational approximation R(x) using Cayley form */
 | 
					/* Evaluate the rational approximation R(x) using Cayley form */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PRECISION zolotarev_cayley_eval(PRECISION x, zolotarev_data* rdata) {
 | 
					static ZOLO_PRECISION zolotarev_cayley_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
 | 
				
			||||||
  int m;
 | 
					  int m;
 | 
				
			||||||
  PRECISION T;
 | 
					  ZOLO_PRECISION T;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  T = rdata -> type == 0 ? ONE : -ONE;
 | 
					  T = rdata -> type == 0 ? ONE : -ONE;
 | 
				
			||||||
  for (m = 0; m < rdata -> n; m++)
 | 
					  for (m = 0; m < rdata -> n; m++)
 | 
				
			||||||
@@ -607,7 +607,7 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
  int m, n, plotpts = 5000, type = 0;
 | 
					  int m, n, plotpts = 5000, type = 0;
 | 
				
			||||||
  float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
 | 
					  float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
 | 
				
			||||||
  zolotarev_data *rdata;
 | 
					  zolotarev_data *rdata;
 | 
				
			||||||
  PRECISION y;
 | 
					  ZOLO_PRECISION y;
 | 
				
			||||||
  FILE *plot_function, *plot_error, 
 | 
					  FILE *plot_function, *plot_error, 
 | 
				
			||||||
    *plot_partfrac, *plot_contfrac, *plot_cayley;
 | 
					    *plot_partfrac, *plot_contfrac, *plot_cayley;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -626,13 +626,13 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  rdata = type == 2 
 | 
					  rdata = type == 2 
 | 
				
			||||||
    ? higham((PRECISION) eps, n) 
 | 
					    ? higham((ZOLO_PRECISION) eps, n) 
 | 
				
			||||||
    : zolotarev((PRECISION) eps, n, type);
 | 
					    : zolotarev((ZOLO_PRECISION) eps, n, type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t" 
 | 
					  printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t" 
 | 
				
			||||||
	 STRINGIFY(VERSION) "\n\t" STRINGIFY(HVERSION)
 | 
						 STRINGIFY(VERSION) "\n\t" STRINGIFY(HVERSION)
 | 
				
			||||||
	 "\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
 | 
						 "\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
 | 
				
			||||||
	 "\tPRECISION = " STRINGIFY(PRECISION)
 | 
						 "\tZOLO_PRECISION = " STRINGIFY(ZOLO_PRECISION)
 | 
				
			||||||
	 "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
 | 
						 "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
 | 
				
			||||||
	 "\tDelta = %g (maximum error)\n\n"
 | 
						 "\tDelta = %g (maximum error)\n\n"
 | 
				
			||||||
	 "\tA = %g (overall factor)\n",
 | 
						 "\tA = %g (overall factor)\n",
 | 
				
			||||||
@@ -681,15 +681,15 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    x = 2.4 * (float) m / plotpts - 1.2;
 | 
					    x = 2.4 * (float) m / plotpts - 1.2;
 | 
				
			||||||
    if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
 | 
					    if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
 | 
				
			||||||
      /* skip x = 0 for type 1, as R(0) is singular */
 | 
					      /* skip x = 0 for type 1, as R(0) is singular */
 | 
				
			||||||
      y = zolotarev_eval((PRECISION) x, rdata);
 | 
					      y = zolotarev_eval((ZOLO_PRECISION) x, rdata);
 | 
				
			||||||
      fprintf(plot_function, "%g %g\n", x, (float) y);
 | 
					      fprintf(plot_function, "%g %g\n", x, (float) y);
 | 
				
			||||||
      fprintf(plot_error, "%g %g\n",
 | 
					      fprintf(plot_error, "%g %g\n",
 | 
				
			||||||
	      x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
 | 
						      x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
 | 
				
			||||||
      ypferr = (float)((zolotarev_partfrac_eval((PRECISION) x, rdata) - y)
 | 
					      ypferr = (float)((zolotarev_partfrac_eval((ZOLO_PRECISION) x, rdata) - y)
 | 
				
			||||||
		       / rdata -> Delta);
 | 
							       / rdata -> Delta);
 | 
				
			||||||
      ycferr = (float)((zolotarev_contfrac_eval((PRECISION) x, rdata) - y)
 | 
					      ycferr = (float)((zolotarev_contfrac_eval((ZOLO_PRECISION) x, rdata) - y)
 | 
				
			||||||
		       / rdata -> Delta);
 | 
							       / rdata -> Delta);
 | 
				
			||||||
      ycaylerr = (float)((zolotarev_cayley_eval((PRECISION) x, rdata) - y)
 | 
					      ycaylerr = (float)((zolotarev_cayley_eval((ZOLO_PRECISION) x, rdata) - y)
 | 
				
			||||||
		       / rdata -> Delta);
 | 
							       / rdata -> Delta);
 | 
				
			||||||
      if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
 | 
					      if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
 | 
				
			||||||
	maxypferr = MAX(maxypferr, fabs(ypferr));
 | 
						maxypferr = MAX(maxypferr, fabs(ypferr));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,10 +9,10 @@ NAMESPACE_BEGIN(Approx);
 | 
				
			|||||||
#define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
 | 
					#define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef ZOLOTAREV_INTERNAL
 | 
					#ifndef ZOLOTAREV_INTERNAL
 | 
				
			||||||
#ifndef PRECISION
 | 
					#ifndef ZOLO_PRECISION
 | 
				
			||||||
#define PRECISION double
 | 
					#define ZOLO_PRECISION double
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#define ZPRECISION PRECISION
 | 
					#define ZPRECISION ZOLO_PRECISION
 | 
				
			||||||
#define ZOLOTAREV_DATA zolotarev_data
 | 
					#define ZOLOTAREV_DATA zolotarev_data
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -77,8 +77,8 @@ typedef struct {
 | 
				
			|||||||
 * zolotarev_data structure. The arguments must satisfy the constraints that
 | 
					 * zolotarev_data structure. The arguments must satisfy the constraints that
 | 
				
			||||||
 * epsilon > 0, n > 0, and type = 0 or 1. */
 | 
					 * epsilon > 0, n > 0, and type = 0 or 1. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ZOLOTAREV_DATA* higham(PRECISION epsilon, int n) ;
 | 
					ZOLOTAREV_DATA* higham(ZOLO_PRECISION epsilon, int n) ;
 | 
				
			||||||
ZOLOTAREV_DATA* zolotarev(PRECISION epsilon, int n, int type);
 | 
					ZOLOTAREV_DATA* zolotarev(ZOLO_PRECISION epsilon, int n, int type);
 | 
				
			||||||
void zolotarev_free(zolotarev_data *zdata);
 | 
					void zolotarev_free(zolotarev_data *zdata);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -86,3 +86,4 @@ void zolotarev_free(zolotarev_data *zdata);
 | 
				
			|||||||
NAMESPACE_END(Approx);
 | 
					NAMESPACE_END(Approx);
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,12 +1,12 @@
 | 
				
			|||||||
    /*************************************************************************************
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Source file: ./tests/Test_rng.cc
 | 
					    Source file: BatchedBlas.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Copyright (C) 2015
 | 
					    Copyright (C) 2023
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					Author: Peter Boyle <pboyle@bnl.gov>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    This program is free software; you can redistribute it and/or modify
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
    it under the terms of the GNU General Public License as published by
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
@@ -23,15 +23,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
    *************************************************************************************/
 | 
					*************************************************************************************/
 | 
				
			||||||
    /*  END LEGAL */
 | 
					/*  END LEGAL */
 | 
				
			||||||
#include <Grid/Grid.h>
 | 
					#include <Grid/GridCore.h>
 | 
				
			||||||
 | 
					#include <Grid/algorithms/blas/BatchedBlas.h>
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					gridblasHandle_t GridBLAS::gridblasHandle;
 | 
				
			||||||
 | 
					int              GridBLAS::gridblasInit;
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using namespace std;
 | 
					 | 
				
			||||||
using namespace Grid;
 | 
					 | 
				
			||||||
 ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main (int argc, char ** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  std::cout << "atif1 " << __FILE__ << ":" << __LINE__ << std::endl;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
							
								
								
									
										727
									
								
								Grid/algorithms/blas/BatchedBlas.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										727
									
								
								Grid/algorithms/blas/BatchedBlas.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,727 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: BatchedBlas.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2023
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <pboyle@bnl.gov>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					#include <hipblas/hipblas.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					#include <cublas_v2.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					#include <oneapi/mkl.hpp>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if 0
 | 
				
			||||||
 | 
					#define GRID_ONE_MKL
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_ONE_MKL
 | 
				
			||||||
 | 
					#include <oneapi/mkl.hpp>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////////////////	  
 | 
				
			||||||
 | 
					// Need to rearrange lattice data to be in the right format for a
 | 
				
			||||||
 | 
					// batched multiply. Might as well make these static, dense packed
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					  typedef hipblasHandle_t gridblasHandle_t;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					  typedef cublasHandle_t gridblasHandle_t;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					  typedef cl::sycl::queue *gridblasHandle_t;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_ONE_MKL
 | 
				
			||||||
 | 
					  typedef cl::sycl::queue *gridblasHandle_t;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
 | 
				
			||||||
 | 
					  typedef int32_t gridblasHandle_t;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum GridBLASOperation_t { GridBLAS_OP_N, GridBLAS_OP_T, GridBLAS_OP_C } ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class GridBLAS {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static gridblasHandle_t gridblasHandle;
 | 
				
			||||||
 | 
					  static int            gridblasInit;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static void Init(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if ( ! gridblasInit ) {
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					      std::cout << "cublasCreate"<<std::endl;
 | 
				
			||||||
 | 
					      cublasCreate(&gridblasHandle);
 | 
				
			||||||
 | 
					      cublasSetPointerMode(gridblasHandle, CUBLAS_POINTER_MODE_DEVICE);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					      std::cout << "hipblasCreate"<<std::endl;
 | 
				
			||||||
 | 
					      hipblasCreate(&gridblasHandle);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					      gridblasHandle = theGridAccelerator;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_ONE_MKL
 | 
				
			||||||
 | 
					      cl::sycl::cpu_selector selector;
 | 
				
			||||||
 | 
					      cl::sycl::device selectedDevice { selector };
 | 
				
			||||||
 | 
					      gridblasHandle =new sycl::queue (selectedDevice);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					      gridblasInit=1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // Force construct once
 | 
				
			||||||
 | 
					  GridBLAS() { Init(); };
 | 
				
			||||||
 | 
					  ~GridBLAS() { };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // BLAS GEMM conventions:
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // - C = alpha A * B + beta C
 | 
				
			||||||
 | 
					  // Dimensions:
 | 
				
			||||||
 | 
					  // - C_m.n
 | 
				
			||||||
 | 
					  // - A_m.k
 | 
				
			||||||
 | 
					  // - B_k.n
 | 
				
			||||||
 | 
					  // - Flops = 8 M N K
 | 
				
			||||||
 | 
					  // - Bytes = 2*sizeof(word) * (MN+MK+KN)
 | 
				
			||||||
 | 
					  // M=60, N=12
 | 
				
			||||||
 | 
					  // Flop/Byte = 8 . 60.60.12 / (60.12+60.60+60.12)/16 = 4 so expect about 4 TF/s on a GCD
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  void synchronise(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    auto err = hipDeviceSynchronize();
 | 
				
			||||||
 | 
					    assert(err==hipSuccess);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    auto err = cudaDeviceSynchronize();
 | 
				
			||||||
 | 
					    assert(err==cudaSuccess);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					    accelerator_barrier();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_ONE_MKL
 | 
				
			||||||
 | 
					    gridblasHandle->wait();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  void gemmBatched(int m,int n, int k,
 | 
				
			||||||
 | 
							   ComplexD alpha,
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Bkn,
 | 
				
			||||||
 | 
							   ComplexD beta,
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    gemmBatched(GridBLAS_OP_N,GridBLAS_OP_N,
 | 
				
			||||||
 | 
							m,n,k,
 | 
				
			||||||
 | 
							alpha,
 | 
				
			||||||
 | 
							Amk,
 | 
				
			||||||
 | 
							Bkn,
 | 
				
			||||||
 | 
							beta,
 | 
				
			||||||
 | 
							Cmn);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void gemmBatched(int m,int n, int k,
 | 
				
			||||||
 | 
							   ComplexF alpha,
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Bkn,
 | 
				
			||||||
 | 
							   ComplexF beta,
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    gemmBatched(GridBLAS_OP_N,GridBLAS_OP_N,
 | 
				
			||||||
 | 
							m,n,k,
 | 
				
			||||||
 | 
							alpha,
 | 
				
			||||||
 | 
							Amk,
 | 
				
			||||||
 | 
							Bkn,
 | 
				
			||||||
 | 
							beta,
 | 
				
			||||||
 | 
							Cmn);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void gemmBatched(int m,int n, int k,
 | 
				
			||||||
 | 
							   RealD alpha,
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Bkn,
 | 
				
			||||||
 | 
							   RealD beta,
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    gemmBatched(GridBLAS_OP_N,GridBLAS_OP_N,
 | 
				
			||||||
 | 
							m,n,k,
 | 
				
			||||||
 | 
							alpha,
 | 
				
			||||||
 | 
							Amk,
 | 
				
			||||||
 | 
							Bkn,
 | 
				
			||||||
 | 
							beta,
 | 
				
			||||||
 | 
							Cmn);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void gemmBatched(int m,int n, int k,
 | 
				
			||||||
 | 
							   RealF alpha,
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Bkn,
 | 
				
			||||||
 | 
							   RealF beta,
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    gemmBatched(GridBLAS_OP_N,GridBLAS_OP_N,
 | 
				
			||||||
 | 
							m,n,k,
 | 
				
			||||||
 | 
							alpha,
 | 
				
			||||||
 | 
							Amk,
 | 
				
			||||||
 | 
							Bkn,
 | 
				
			||||||
 | 
							beta,
 | 
				
			||||||
 | 
							Cmn);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void gemmBatched(GridBLASOperation_t OpA,
 | 
				
			||||||
 | 
							   GridBLASOperation_t OpB,
 | 
				
			||||||
 | 
							   int m,int n, int k,
 | 
				
			||||||
 | 
							   ComplexD alpha,
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Bkn,
 | 
				
			||||||
 | 
							   ComplexD beta,
 | 
				
			||||||
 | 
							   deviceVector<ComplexD*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD t2=usecond();
 | 
				
			||||||
 | 
					    int32_t batchCount = Amk.size();
 | 
				
			||||||
 | 
					    assert(Bkn.size()==batchCount);
 | 
				
			||||||
 | 
					    assert(Cmn.size()==batchCount);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int lda = m; // m x k column major
 | 
				
			||||||
 | 
					    int ldb = k; // k x n column major
 | 
				
			||||||
 | 
					    int ldc = m; // m x b column major
 | 
				
			||||||
 | 
					    if(OpA!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      lda = k;
 | 
				
			||||||
 | 
					    if(OpB!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      ldb = n;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    static deviceVector<ComplexD> alpha_p(1);
 | 
				
			||||||
 | 
					    static deviceVector<ComplexD> beta_p(1);
 | 
				
			||||||
 | 
					    // can prestore the 1 and the zero on device
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&alpha,(void *)&alpha_p[0],sizeof(ComplexD));
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&beta ,(void *)&beta_p[0],sizeof(ComplexD));
 | 
				
			||||||
 | 
					    RealD t0=usecond();
 | 
				
			||||||
 | 
					    //    std::cout << "ZgemmBatched mnk  "<<m<<","<<n<<","<<k<<" count "<<batchCount<<std::endl;
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpA;
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = hipblasZgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									   hOpA,
 | 
				
			||||||
 | 
									   hOpB,
 | 
				
			||||||
 | 
									   m,n,k,
 | 
				
			||||||
 | 
									   (hipblasDoubleComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
									   (hipblasDoubleComplex **)&Amk[0], lda,
 | 
				
			||||||
 | 
									   (hipblasDoubleComplex **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									   (hipblasDoubleComplex *) &beta_p[0],
 | 
				
			||||||
 | 
									   (hipblasDoubleComplex **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									   batchCount);
 | 
				
			||||||
 | 
					    //	 std::cout << " hipblas return code " <<(int)err<<std::endl;
 | 
				
			||||||
 | 
					    assert(err==HIPBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    cublasOperation_t hOpA;
 | 
				
			||||||
 | 
					    cublasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = cublasZgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									  hOpA,
 | 
				
			||||||
 | 
									  hOpB,
 | 
				
			||||||
 | 
									  m,n,k,
 | 
				
			||||||
 | 
									  (cuDoubleComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
									  (cuDoubleComplex **)&Amk[0], lda,
 | 
				
			||||||
 | 
									  (cuDoubleComplex **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									  (cuDoubleComplex *) &beta_p[0],
 | 
				
			||||||
 | 
									  (cuDoubleComplex **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									  batchCount);
 | 
				
			||||||
 | 
					    assert(err==CUBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					    //MKL’s cblas_<T>gemm_batch & OneAPI
 | 
				
			||||||
 | 
					#warning "oneMKL implementation not built "
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
 | 
				
			||||||
 | 
					    // Need a default/reference implementation
 | 
				
			||||||
 | 
					    int sda = lda*k;
 | 
				
			||||||
 | 
					    int sdb = ldb*k;
 | 
				
			||||||
 | 
					    int sdc = ldc*n;
 | 
				
			||||||
 | 
					    for (int p = 0; p < batchCount; ++p) {
 | 
				
			||||||
 | 
					      for (int mm = 0; mm < m; ++mm) {
 | 
				
			||||||
 | 
						for (int nn = 0; nn < n; ++nn) {
 | 
				
			||||||
 | 
						  ComplexD c_mn(0.0);
 | 
				
			||||||
 | 
						  for (int kk = 0; kk < k; ++kk)
 | 
				
			||||||
 | 
						    c_mn += Amk[p][mm + kk*lda ] * Bkn[p][kk + nn*ldb];
 | 
				
			||||||
 | 
						  Cmn[p][mm + nn*ldc] =  (alpha)*c_mn + (beta)*Cmn[p][mm + nn*ldc ];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    //    synchronise();
 | 
				
			||||||
 | 
					     RealD t1=usecond();
 | 
				
			||||||
 | 
					     RealD flops = 8.0*m*n*k*batchCount;
 | 
				
			||||||
 | 
					     RealD bytes = 1.0*sizeof(ComplexD)*(m*k+k*n+m*n)*batchCount;
 | 
				
			||||||
 | 
					     //     std::cout <<GridLogMessage<< " batched Blas copy "<<(t0-t2)/1.e3 <<" ms "<<std::endl;
 | 
				
			||||||
 | 
					     //     std::cout <<GridLogMessage<< " batched Blas zGemm call "<<m<<","<<n<<","<<k<<" "<< flops/(t1-t0)/1.e3 <<" GF/s "<<(t1-t0)/1.e3<<" ms "<<std::endl;
 | 
				
			||||||
 | 
					     //     std::cout <<GridLogMessage<< " batched Blas zGemm call "<<m<<","<<n<<","<<k<<" "<< bytes/(t1-t0)/1.e3 <<" GB/s "<<(t1-t0)/1.e3<<" ms "<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void gemmBatched(GridBLASOperation_t OpA,
 | 
				
			||||||
 | 
							   GridBLASOperation_t OpB,
 | 
				
			||||||
 | 
							   int m,int n, int k,
 | 
				
			||||||
 | 
							   ComplexF alpha,
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Bkn,
 | 
				
			||||||
 | 
							   ComplexF beta,
 | 
				
			||||||
 | 
							   deviceVector<ComplexF*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD t2=usecond();
 | 
				
			||||||
 | 
					    int32_t batchCount = Amk.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int lda = m; // m x k column major
 | 
				
			||||||
 | 
					    int ldb = k; // k x n column major
 | 
				
			||||||
 | 
					    int ldc = m; // m x b column major
 | 
				
			||||||
 | 
					    if(OpA!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      lda = k;
 | 
				
			||||||
 | 
					    if(OpB!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      ldb = n;
 | 
				
			||||||
 | 
					    static deviceVector<ComplexF> alpha_p(1);
 | 
				
			||||||
 | 
					    static deviceVector<ComplexF> beta_p(1);
 | 
				
			||||||
 | 
					    // can prestore the 1 and the zero on device
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&alpha,(void *)&alpha_p[0],sizeof(ComplexF));
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&beta ,(void *)&beta_p[0],sizeof(ComplexF));
 | 
				
			||||||
 | 
					    RealD t0=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(Bkn.size()==batchCount);
 | 
				
			||||||
 | 
					    assert(Cmn.size()==batchCount);
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpA;
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = hipblasCgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									   hOpA,
 | 
				
			||||||
 | 
									   hOpB,
 | 
				
			||||||
 | 
									   m,n,k,
 | 
				
			||||||
 | 
									   (hipblasComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
									   (hipblasComplex **)&Amk[0], lda,
 | 
				
			||||||
 | 
									   (hipblasComplex **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									   (hipblasComplex *) &beta_p[0],
 | 
				
			||||||
 | 
									   (hipblasComplex **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									   batchCount);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(err==HIPBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    cublasOperation_t hOpA;
 | 
				
			||||||
 | 
					    cublasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = cublasCgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									  hOpA,
 | 
				
			||||||
 | 
									  hOpB,
 | 
				
			||||||
 | 
									  m,n,k,
 | 
				
			||||||
 | 
									  (cuComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
									  (cuComplex **)&Amk[0], lda,
 | 
				
			||||||
 | 
									  (cuComplex **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									  (cuComplex *) &beta_p[0],
 | 
				
			||||||
 | 
									  (cuComplex **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									  batchCount);
 | 
				
			||||||
 | 
					    assert(err==CUBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					    //MKL’s cblas_<T>gemm_batch & OneAPI
 | 
				
			||||||
 | 
					#warning "oneMKL implementation not built "
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
 | 
				
			||||||
 | 
					    int sda = lda*k;
 | 
				
			||||||
 | 
					    int sdb = ldb*k;
 | 
				
			||||||
 | 
					    int sdc = ldc*n;
 | 
				
			||||||
 | 
					    ComplexF alphaf(real(alpha),imag(alpha));
 | 
				
			||||||
 | 
					    ComplexF betaf(real(beta),imag(beta));
 | 
				
			||||||
 | 
					    // Need a default/reference implementation
 | 
				
			||||||
 | 
					    for (int p = 0; p < batchCount; ++p) {
 | 
				
			||||||
 | 
					      for (int mm = 0; mm < m; ++mm) {
 | 
				
			||||||
 | 
						for (int nn = 0; nn < n; ++nn) {
 | 
				
			||||||
 | 
						  ComplexF c_mn(0.0);
 | 
				
			||||||
 | 
						  for (int kk = 0; kk < k; ++kk)
 | 
				
			||||||
 | 
						    c_mn += Amk[p][mm + kk*lda ] * Bkn[p][kk + nn*ldb];
 | 
				
			||||||
 | 
						  Cmn[p][mm + nn*ldc] =  (alphaf)*c_mn + (betaf)*Cmn[p][mm + nn*ldc ];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					     RealD t1=usecond();
 | 
				
			||||||
 | 
					     RealD flops = 8.0*m*n*k*batchCount;
 | 
				
			||||||
 | 
					     RealD bytes = 1.0*sizeof(ComplexF)*(m*k+k*n+m*n)*batchCount;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Single precision real GEMM
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void gemmBatched(GridBLASOperation_t OpA,
 | 
				
			||||||
 | 
							   GridBLASOperation_t OpB,
 | 
				
			||||||
 | 
							   int m,int n, int k,
 | 
				
			||||||
 | 
							   RealF alpha,
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Bkn,
 | 
				
			||||||
 | 
							   RealF beta,
 | 
				
			||||||
 | 
							   deviceVector<RealF*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD t2=usecond();
 | 
				
			||||||
 | 
					    int32_t batchCount = Amk.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int lda = m; // m x k column major
 | 
				
			||||||
 | 
					    int ldb = k; // k x n column major
 | 
				
			||||||
 | 
					    int ldc = m; // m x b column major
 | 
				
			||||||
 | 
					    if(OpA!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      lda = k;
 | 
				
			||||||
 | 
					    if(OpB!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      ldb = n;
 | 
				
			||||||
 | 
					    static deviceVector<RealF> alpha_p(1);
 | 
				
			||||||
 | 
					    static deviceVector<RealF> beta_p(1);
 | 
				
			||||||
 | 
					    // can prestore the 1 and the zero on device
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&alpha,(void *)&alpha_p[0],sizeof(RealF));
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&beta ,(void *)&beta_p[0],sizeof(RealF));
 | 
				
			||||||
 | 
					    RealD t0=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(Bkn.size()==batchCount);
 | 
				
			||||||
 | 
					    assert(Cmn.size()==batchCount);
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpA;
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = hipblasSgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									   hOpA,
 | 
				
			||||||
 | 
									   hOpB,
 | 
				
			||||||
 | 
									   m,n,k,
 | 
				
			||||||
 | 
									   (float *) &alpha_p[0],
 | 
				
			||||||
 | 
									   (float **)&Amk[0], lda,
 | 
				
			||||||
 | 
									   (float **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									   (float *) &beta_p[0],
 | 
				
			||||||
 | 
									   (float **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									   batchCount);
 | 
				
			||||||
 | 
					    assert(err==HIPBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    cublasOperation_t hOpA;
 | 
				
			||||||
 | 
					    cublasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = cublasSgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									  hOpA,
 | 
				
			||||||
 | 
									  hOpB,
 | 
				
			||||||
 | 
									  m,n,k,
 | 
				
			||||||
 | 
									  (float *) &alpha_p[0],
 | 
				
			||||||
 | 
									  (float **)&Amk[0], lda,
 | 
				
			||||||
 | 
									  (float **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									  (float *) &beta_p[0],
 | 
				
			||||||
 | 
									  (float **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									  batchCount);
 | 
				
			||||||
 | 
					    assert(err==CUBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					    //MKL’s cblas_<T>gemm_batch & OneAPI
 | 
				
			||||||
 | 
					#warning "oneMKL implementation not built "
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
 | 
				
			||||||
 | 
					    int sda = lda*k;
 | 
				
			||||||
 | 
					    int sdb = ldb*k;
 | 
				
			||||||
 | 
					    int sdc = ldc*n;
 | 
				
			||||||
 | 
					    // Need a default/reference implementation
 | 
				
			||||||
 | 
					    for (int p = 0; p < batchCount; ++p) {
 | 
				
			||||||
 | 
					      for (int mm = 0; mm < m; ++mm) {
 | 
				
			||||||
 | 
						for (int nn = 0; nn < n; ++nn) {
 | 
				
			||||||
 | 
						  RealD c_mn(0.0);
 | 
				
			||||||
 | 
						  for (int kk = 0; kk < k; ++kk)
 | 
				
			||||||
 | 
						    c_mn += Amk[p][mm + kk*lda ] * Bkn[p][kk + nn*ldb];
 | 
				
			||||||
 | 
						  Cmn[p][mm + nn*ldc] =  (alpha)*c_mn + (beta)*Cmn[p][mm + nn*ldc ];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					     RealD t1=usecond();
 | 
				
			||||||
 | 
					     RealD flops = 2.0*m*n*k*batchCount;
 | 
				
			||||||
 | 
					     RealD bytes = 1.0*sizeof(RealF)*(m*k+k*n+m*n)*batchCount;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Double precision real GEMM
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void gemmBatched(GridBLASOperation_t OpA,
 | 
				
			||||||
 | 
							   GridBLASOperation_t OpB,
 | 
				
			||||||
 | 
							   int m,int n, int k,
 | 
				
			||||||
 | 
							   RealD alpha,
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Bkn,
 | 
				
			||||||
 | 
							   RealD beta,
 | 
				
			||||||
 | 
							   deviceVector<RealD*> &Cmn)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD t2=usecond();
 | 
				
			||||||
 | 
					    int32_t batchCount = Amk.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int lda = m; // m x k column major
 | 
				
			||||||
 | 
					    int ldb = k; // k x n column major
 | 
				
			||||||
 | 
					    int ldc = m; // m x b column major
 | 
				
			||||||
 | 
					    if(OpA!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      lda = k;
 | 
				
			||||||
 | 
					    if(OpB!=GridBLAS_OP_N)
 | 
				
			||||||
 | 
					      ldb = n;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    static deviceVector<RealD> alpha_p(1);
 | 
				
			||||||
 | 
					    static deviceVector<RealD> beta_p(1);
 | 
				
			||||||
 | 
					    // can prestore the 1 and the zero on device
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&alpha,(void *)&alpha_p[0],sizeof(RealD));
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&beta ,(void *)&beta_p[0],sizeof(RealD));
 | 
				
			||||||
 | 
					    RealD t0=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(Bkn.size()==batchCount);
 | 
				
			||||||
 | 
					    assert(Cmn.size()==batchCount);
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpA;
 | 
				
			||||||
 | 
					    hipblasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = HIPBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = HIPBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = HIPBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = hipblasDgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									   HIPBLAS_OP_N,
 | 
				
			||||||
 | 
									   HIPBLAS_OP_N,
 | 
				
			||||||
 | 
									   m,n,k,
 | 
				
			||||||
 | 
									   (double *) &alpha_p[0],
 | 
				
			||||||
 | 
									   (double **)&Amk[0], lda,
 | 
				
			||||||
 | 
									   (double **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									   (double *) &beta_p[0],
 | 
				
			||||||
 | 
									   (double **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									   batchCount);
 | 
				
			||||||
 | 
					    assert(err==HIPBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    cublasOperation_t hOpA;
 | 
				
			||||||
 | 
					    cublasOperation_t hOpB;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_N ) hOpA = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_T ) hOpA = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpA == GridBLAS_OP_C ) hOpA = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_N ) hOpB = CUBLAS_OP_N;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_T ) hOpB = CUBLAS_OP_T;
 | 
				
			||||||
 | 
					    if ( OpB == GridBLAS_OP_C ) hOpB = CUBLAS_OP_C;
 | 
				
			||||||
 | 
					    auto err = cublasDgemmBatched(gridblasHandle,
 | 
				
			||||||
 | 
									  hOpA,
 | 
				
			||||||
 | 
									  hOpB,
 | 
				
			||||||
 | 
									  m,n,k,
 | 
				
			||||||
 | 
									  (double *) &alpha_p[0],
 | 
				
			||||||
 | 
									  (double **)&Amk[0], lda,
 | 
				
			||||||
 | 
									  (double **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
									  (double *) &beta_p[0],
 | 
				
			||||||
 | 
									  (double **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
									  batchCount);
 | 
				
			||||||
 | 
					    assert(err==CUBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					      int64_t m64=m;
 | 
				
			||||||
 | 
					      int64_t n64=n;
 | 
				
			||||||
 | 
					      int64_t k64=k;
 | 
				
			||||||
 | 
					      int64_t batchCount64=batchCount;
 | 
				
			||||||
 | 
					      oneapi::mkl::blas::column_major::gemm_batch(*theGridAccelerator,
 | 
				
			||||||
 | 
					      onemkl::transpose::N,
 | 
				
			||||||
 | 
					      onemkl::transpose::N,
 | 
				
			||||||
 | 
					      &m64,&n64,&k64,
 | 
				
			||||||
 | 
					      (double *) &alpha_p[0],
 | 
				
			||||||
 | 
					      (double **)&Amk[0], lda,
 | 
				
			||||||
 | 
					      (double **)&Bkn[0], ldb,
 | 
				
			||||||
 | 
					      (double *) &beta_p[0],
 | 
				
			||||||
 | 
					      (double **)&Cmn[0], ldc,
 | 
				
			||||||
 | 
					      1,&batchCount64);
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    //MKL’s cblas_<T>gemm_batch & OneAPI
 | 
				
			||||||
 | 
					#warning "oneMKL implementation not built "
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP)
 | 
				
			||||||
 | 
					    int sda = lda*k;
 | 
				
			||||||
 | 
					    int sdb = ldb*k;
 | 
				
			||||||
 | 
					    int sdc = ldc*n;
 | 
				
			||||||
 | 
					    // Need a default/reference implementation
 | 
				
			||||||
 | 
					    for (int p = 0; p < batchCount; ++p) {
 | 
				
			||||||
 | 
					      for (int mm = 0; mm < m; ++mm) {
 | 
				
			||||||
 | 
						for (int nn = 0; nn < n; ++nn) {
 | 
				
			||||||
 | 
						  RealD c_mn(0.0);
 | 
				
			||||||
 | 
						  for (int kk = 0; kk < k; ++kk)
 | 
				
			||||||
 | 
						    c_mn += Amk[p][mm + kk*lda ] * Bkn[p][kk + nn*ldb];
 | 
				
			||||||
 | 
						  Cmn[p][mm + nn*ldc] =  (alpha)*c_mn + (beta)*Cmn[p][mm + nn*ldc ];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					     RealD t1=usecond();
 | 
				
			||||||
 | 
					     RealD flops = 2.0*m*n*k*batchCount;
 | 
				
			||||||
 | 
					     RealD bytes = 1.0*sizeof(RealD)*(m*k+k*n+m*n)*batchCount;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Strided case used by benchmark, but generally unused in Grid
 | 
				
			||||||
 | 
					  // Keep a code example in double complex, but don't generate the single and real variants for now
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  void gemmStridedBatched(int m,int n, int k,
 | 
				
			||||||
 | 
								  ComplexD alpha,
 | 
				
			||||||
 | 
								  ComplexD* Amk,  // pointer list to matrices
 | 
				
			||||||
 | 
								  ComplexD* Bkn,
 | 
				
			||||||
 | 
								  ComplexD beta,
 | 
				
			||||||
 | 
								  ComplexD* Cmn,
 | 
				
			||||||
 | 
								  int batchCount)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    // Use C-row major storage, so transpose calls
 | 
				
			||||||
 | 
					    int lda = m; // m x k column major
 | 
				
			||||||
 | 
					    int ldb = k; // k x n column major
 | 
				
			||||||
 | 
					    int ldc = m; // m x b column major
 | 
				
			||||||
 | 
					    int sda = m*k;
 | 
				
			||||||
 | 
					    int sdb = k*n;
 | 
				
			||||||
 | 
					    int sdc = m*n;
 | 
				
			||||||
 | 
					    deviceVector<ComplexD> alpha_p(1);
 | 
				
			||||||
 | 
					    deviceVector<ComplexD> beta_p(1);
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&alpha,(void *)&alpha_p[0],sizeof(ComplexD));
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice((void *)&beta ,(void *)&beta_p[0],sizeof(ComplexD));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //    std::cout << "blasZgemmStridedBatched mnk  "<<m<<","<<n<<","<<k<<" count "<<batchCount<<std::endl;
 | 
				
			||||||
 | 
					    //    std::cout << "blasZgemmStridedBatched ld   "<<lda<<","<<ldb<<","<<ldc<<std::endl;
 | 
				
			||||||
 | 
					    //    std::cout << "blasZgemmStridedBatched sd   "<<sda<<","<<sdb<<","<<sdc<<std::endl;
 | 
				
			||||||
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
 | 
					    auto err = hipblasZgemmStridedBatched(gridblasHandle,
 | 
				
			||||||
 | 
										  HIPBLAS_OP_N,
 | 
				
			||||||
 | 
										  HIPBLAS_OP_N,
 | 
				
			||||||
 | 
										  m,n,k,
 | 
				
			||||||
 | 
										  (hipblasDoubleComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
										  (hipblasDoubleComplex *) Amk, lda, sda,
 | 
				
			||||||
 | 
										  (hipblasDoubleComplex *) Bkn, ldb, sdb,
 | 
				
			||||||
 | 
										  (hipblasDoubleComplex *) &beta_p[0],
 | 
				
			||||||
 | 
										  (hipblasDoubleComplex *) Cmn, ldc, sdc,
 | 
				
			||||||
 | 
										  batchCount);
 | 
				
			||||||
 | 
					    assert(err==HIPBLAS_STATUS_SUCCESS);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					    cublasZgemmStridedBatched(gridblasHandle,
 | 
				
			||||||
 | 
								      CUBLAS_OP_N,
 | 
				
			||||||
 | 
								      CUBLAS_OP_N,
 | 
				
			||||||
 | 
								      m,n,k,
 | 
				
			||||||
 | 
								      (cuDoubleComplex *) &alpha_p[0],
 | 
				
			||||||
 | 
								      (cuDoubleComplex *) Amk, lda, sda,
 | 
				
			||||||
 | 
								      (cuDoubleComplex *) Bkn, ldb, sdb,
 | 
				
			||||||
 | 
								      (cuDoubleComplex *) &beta_p[0],
 | 
				
			||||||
 | 
								      (cuDoubleComplex *) Cmn, ldc, sdc,
 | 
				
			||||||
 | 
								      batchCount);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if defined(GRID_SYCL) || defined(GRID_ONE_MKL)
 | 
				
			||||||
 | 
					    oneapi::mkl::blas::column_major::gemm_batch(*gridblasHandle,
 | 
				
			||||||
 | 
											oneapi::mkl::transpose::N,
 | 
				
			||||||
 | 
											oneapi::mkl::transpose::N,
 | 
				
			||||||
 | 
											m,n,k,
 | 
				
			||||||
 | 
											alpha,
 | 
				
			||||||
 | 
											(const ComplexD *)Amk,lda,sda,
 | 
				
			||||||
 | 
											(const ComplexD *)Bkn,ldb,sdb,
 | 
				
			||||||
 | 
											beta,
 | 
				
			||||||
 | 
											(ComplexD *)Cmn,ldc,sdc,
 | 
				
			||||||
 | 
											batchCount);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#if !defined(GRID_SYCL) && !defined(GRID_CUDA) && !defined(GRID_HIP) && !defined(GRID_ONE_MKL)
 | 
				
			||||||
 | 
					     // Need a default/reference implementation
 | 
				
			||||||
 | 
					     for (int p = 0; p < batchCount; ++p) {
 | 
				
			||||||
 | 
					       for (int mm = 0; mm < m; ++mm) {
 | 
				
			||||||
 | 
						 for (int nn = 0; nn < n; ++nn) {
 | 
				
			||||||
 | 
						   ComplexD c_mn(0.0);
 | 
				
			||||||
 | 
						   for (int kk = 0; kk < k; ++kk)
 | 
				
			||||||
 | 
						     c_mn += Amk[mm + kk*lda + p*sda] * Bkn[kk + nn*ldb + p*sdb];
 | 
				
			||||||
 | 
						   Cmn[mm + nn*ldc + p*sdc] =  (alpha)*c_mn + (beta)*Cmn[mm + nn*ldc + p*sdc];
 | 
				
			||||||
 | 
						 }
 | 
				
			||||||
 | 
					       }
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  double benchmark(int M, int N, int K, int BATCH)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int32_t N_A = M*K*BATCH;
 | 
				
			||||||
 | 
					    int32_t N_B = K*N*BATCH;
 | 
				
			||||||
 | 
					    int32_t N_C = M*N*BATCH;
 | 
				
			||||||
 | 
					    deviceVector<ComplexD> A(N_A); acceleratorMemSet(&A[0],0,N_A*sizeof(ComplexD));
 | 
				
			||||||
 | 
					    deviceVector<ComplexD> B(N_B); acceleratorMemSet(&B[0],0,N_B*sizeof(ComplexD));
 | 
				
			||||||
 | 
					    deviceVector<ComplexD> C(N_C); acceleratorMemSet(&C[0],0,N_C*sizeof(ComplexD));
 | 
				
			||||||
 | 
					    ComplexD alpha(1.0);
 | 
				
			||||||
 | 
					    ComplexD beta (1.0);
 | 
				
			||||||
 | 
					    RealD flops = 8.0*M*N*K*BATCH;
 | 
				
			||||||
 | 
					    int ncall=10;
 | 
				
			||||||
 | 
					    RealD t0 = usecond();
 | 
				
			||||||
 | 
					    for(int i=0;i<ncall;i++){
 | 
				
			||||||
 | 
					      gemmStridedBatched(M,N,K,
 | 
				
			||||||
 | 
								 alpha,
 | 
				
			||||||
 | 
								 &A[0], // m x k 
 | 
				
			||||||
 | 
								 &B[0], // k x n
 | 
				
			||||||
 | 
								 beta, 
 | 
				
			||||||
 | 
								 &C[0], // m x n
 | 
				
			||||||
 | 
								 BATCH);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    synchronise();
 | 
				
			||||||
 | 
					    RealD t1 = usecond();
 | 
				
			||||||
 | 
					    RealD bytes = 1.0*sizeof(ComplexD)*(M*N*2+N*K+M*K)*BATCH;
 | 
				
			||||||
 | 
					    flops = 8.0*M*N*K*BATCH*ncall;
 | 
				
			||||||
 | 
					    flops = flops/(t1-t0)/1.e3;
 | 
				
			||||||
 | 
					    return flops; // Returns gigaflops
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -176,6 +176,7 @@ template<class T> using cshiftAllocator = std::allocator<T>;
 | 
				
			|||||||
template<class T> using Vector        = std::vector<T,uvmAllocator<T> >;           
 | 
					template<class T> using Vector        = std::vector<T,uvmAllocator<T> >;           
 | 
				
			||||||
template<class T> using stencilVector = std::vector<T,alignedAllocator<T> >;           
 | 
					template<class T> using stencilVector = std::vector<T,alignedAllocator<T> >;           
 | 
				
			||||||
template<class T> using commVector = std::vector<T,devAllocator<T> >;
 | 
					template<class T> using commVector = std::vector<T,devAllocator<T> >;
 | 
				
			||||||
 | 
					template<class T> using deviceVector  = std::vector<T,devAllocator<T> >;
 | 
				
			||||||
template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;
 | 
					template<class T> using cshiftVector = std::vector<T,cshiftAllocator<T> >;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -222,9 +222,6 @@ void MemoryManager::InitMessage(void) {
 | 
				
			|||||||
#ifdef GRID_SYCL
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
  std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
 | 
					  std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_OMPTARGET
 | 
					 | 
				
			||||||
  std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET managed memory"<<std::endl;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
  std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
 | 
					  std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
 | 
				
			||||||
#ifdef GRID_CUDA
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
@@ -236,9 +233,6 @@ void MemoryManager::InitMessage(void) {
 | 
				
			|||||||
#ifdef GRID_SYCL
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
  std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
 | 
					  std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_OMPTARGET
 | 
					 | 
				
			||||||
  std::cout << GridLogMessage<< "MemoryManager::Init() Using OMPTARGET omp_alloc_device"<<std::endl;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -348,6 +348,7 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
 | 
				
			|||||||
  return offbytes;
 | 
					  return offbytes;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef NVLINK_GET // Define to use get instead of put DMA
 | 
				
			||||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
					double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
				
			||||||
							 void *xmit,
 | 
												 void *xmit,
 | 
				
			||||||
							 int dest,int dox,
 | 
												 int dest,int dox,
 | 
				
			||||||
@@ -380,9 +381,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 | 
				
			|||||||
      list.push_back(rrq);
 | 
					      list.push_back(rrq);
 | 
				
			||||||
      off_node_bytes+=rbytes;
 | 
					      off_node_bytes+=rbytes;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					#ifdef NVLINK_GET
 | 
				
			||||||
 | 
					      void *shm = (void *) this->ShmBufferTranslate(from,xmit);
 | 
				
			||||||
 | 
					      assert(shm!=NULL);
 | 
				
			||||||
 | 
					      acceleratorCopyDeviceToDeviceAsynch(shm,recv,rbytes);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  if (dox) {
 | 
					  if (dox) {
 | 
				
			||||||
 | 
					    //  rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
 | 
				
			||||||
    if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
 | 
					    if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
 | 
				
			||||||
      tag= dir+_processor*32;
 | 
					      tag= dir+_processor*32;
 | 
				
			||||||
      ierr =MPI_Isend(xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
 | 
					      ierr =MPI_Isend(xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
 | 
				
			||||||
@@ -390,9 +397,12 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 | 
				
			|||||||
      list.push_back(xrq);
 | 
					      list.push_back(xrq);
 | 
				
			||||||
      off_node_bytes+=xbytes;
 | 
					      off_node_bytes+=xbytes;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					#ifndef NVLINK_GET
 | 
				
			||||||
      void *shm = (void *) this->ShmBufferTranslate(dest,recv);
 | 
					      void *shm = (void *) this->ShmBufferTranslate(dest,recv);
 | 
				
			||||||
      assert(shm!=NULL);
 | 
					      assert(shm!=NULL);
 | 
				
			||||||
      acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
 | 
					      acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -402,6 +412,8 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  int nreq=list.size();
 | 
					  int nreq=list.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  acceleratorCopySynchronise();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (nreq==0) return;
 | 
					  if (nreq==0) return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::vector<MPI_Status> status(nreq);
 | 
					  std::vector<MPI_Status> status(nreq);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,6 +40,9 @@ int                 GlobalSharedMemory::_ShmAlloc;
 | 
				
			|||||||
uint64_t            GlobalSharedMemory::_ShmAllocBytes;
 | 
					uint64_t            GlobalSharedMemory::_ShmAllocBytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
 | 
					std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					void * GlobalSharedMemory::HostCommBuf;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Grid_MPI_Comm       GlobalSharedMemory::WorldShmComm;
 | 
					Grid_MPI_Comm       GlobalSharedMemory::WorldShmComm;
 | 
				
			||||||
int                 GlobalSharedMemory::WorldShmRank;
 | 
					int                 GlobalSharedMemory::WorldShmRank;
 | 
				
			||||||
@@ -66,6 +69,26 @@ void GlobalSharedMemory::SharedMemoryFree(void)
 | 
				
			|||||||
/////////////////////////////////
 | 
					/////////////////////////////////
 | 
				
			||||||
// Alloc, free shmem region
 | 
					// Alloc, free shmem region
 | 
				
			||||||
/////////////////////////////////
 | 
					/////////////////////////////////
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					void *SharedMemory::HostBufferMalloc(size_t bytes){
 | 
				
			||||||
 | 
					  void *ptr = (void *)host_heap_top;
 | 
				
			||||||
 | 
					  host_heap_top  += bytes;
 | 
				
			||||||
 | 
					  host_heap_bytes+= bytes;
 | 
				
			||||||
 | 
					  if (host_heap_bytes >= host_heap_size) {
 | 
				
			||||||
 | 
					    std::cout<< " HostBufferMalloc exceeded heap size -- try increasing with --shm <MB> flag" <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<< " Current bytes is " << (host_heap_bytes/(1024*1024)) <<"MB"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<< " Current heap  is " << (host_heap_size/(1024*1024)) <<"MB"<<std::endl;
 | 
				
			||||||
 | 
					    assert(host_heap_bytes<host_heap_size);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return ptr;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void SharedMemory::HostBufferFreeAll(void) { 
 | 
				
			||||||
 | 
					  host_heap_top  =(size_t)HostCommBuf;
 | 
				
			||||||
 | 
					  host_heap_bytes=0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
 | 
					void *SharedMemory::ShmBufferMalloc(size_t bytes){
 | 
				
			||||||
  //  bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
 | 
					  //  bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
 | 
				
			||||||
  void *ptr = (void *)heap_top;
 | 
					  void *ptr = (void *)heap_top;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -75,7 +75,9 @@ public:
 | 
				
			|||||||
  static int           Hugepages;
 | 
					  static int           Hugepages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  static std::vector<void *> WorldShmCommBufs;
 | 
					  static std::vector<void *> WorldShmCommBufs;
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					  static void *HostCommBuf;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  static Grid_MPI_Comm WorldComm;
 | 
					  static Grid_MPI_Comm WorldComm;
 | 
				
			||||||
  static int           WorldRank;
 | 
					  static int           WorldRank;
 | 
				
			||||||
  static int           WorldSize;
 | 
					  static int           WorldSize;
 | 
				
			||||||
@@ -120,6 +122,13 @@ private:
 | 
				
			|||||||
  size_t heap_bytes;
 | 
					  size_t heap_bytes;
 | 
				
			||||||
  size_t heap_size;
 | 
					  size_t heap_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					  size_t host_heap_top;  // set in free all
 | 
				
			||||||
 | 
					  size_t host_heap_bytes;// set in free all
 | 
				
			||||||
 | 
					  void *HostCommBuf;     // set in SetCommunicator
 | 
				
			||||||
 | 
					  size_t host_heap_size; // set in SetCommunicator
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Grid_MPI_Comm    ShmComm; // for barriers
 | 
					  Grid_MPI_Comm    ShmComm; // for barriers
 | 
				
			||||||
@@ -151,7 +160,10 @@ public:
 | 
				
			|||||||
  void *ShmBufferTranslate(int rank,void * local_p);
 | 
					  void *ShmBufferTranslate(int rank,void * local_p);
 | 
				
			||||||
  void *ShmBufferMalloc(size_t bytes);
 | 
					  void *ShmBufferMalloc(size_t bytes);
 | 
				
			||||||
  void  ShmBufferFreeAll(void) ;
 | 
					  void  ShmBufferFreeAll(void) ;
 | 
				
			||||||
  
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					  void *HostBufferMalloc(size_t bytes);
 | 
				
			||||||
 | 
					  void HostBufferFreeAll(void);
 | 
				
			||||||
 | 
					#endif  
 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Make info on Nodes & ranks and Shared memory available
 | 
					  // Make info on Nodes & ranks and Shared memory available
 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -39,10 +39,12 @@ Author: Christoph Lehner <christoph@lhnr.de>
 | 
				
			|||||||
#include <hip/hip_runtime_api.h>
 | 
					#include <hip/hip_runtime_api.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_SYCL
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					#ifdef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
					#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
				
			||||||
#include <syscall.h>
 | 
					 | 
				
			||||||
#define SHM_SOCKETS
 | 
					#define SHM_SOCKETS
 | 
				
			||||||
#endif 
 | 
					#endif 
 | 
				
			||||||
 | 
					#include <syscall.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <sys/socket.h>
 | 
					#include <sys/socket.h>
 | 
				
			||||||
#include <sys/un.h>
 | 
					#include <sys/un.h>
 | 
				
			||||||
@@ -512,46 +514,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
// Hugetlbfs mapping intended
 | 
					// Hugetlbfs mapping intended
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
#if defined(GRID_CUDA) ||defined(GRID_HIP)  || defined(GRID_SYCL)
 | 
					#if defined(GRID_CUDA) ||defined(GRID_HIP)  || defined(GRID_SYCL)
 | 
				
			||||||
 | 
					 | 
				
			||||||
//if defined(GRID_SYCL)
 | 
					 | 
				
			||||||
#if 0
 | 
					 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  void * ShmCommBuf ; 
 | 
					 | 
				
			||||||
  assert(_ShmSetup==1);
 | 
					 | 
				
			||||||
  assert(_ShmAlloc==0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // allocate the pointer array for shared windows for our group
 | 
					 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  MPI_Barrier(WorldShmComm);
 | 
					 | 
				
			||||||
  WorldShmCommBufs.resize(WorldShmSize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Each MPI rank should allocate our own buffer
 | 
					 | 
				
			||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  ShmCommBuf = acceleratorAllocDevice(bytes);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (ShmCommBuf == (void *)NULL ) {
 | 
					 | 
				
			||||||
    std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
 | 
					 | 
				
			||||||
    exit(EXIT_FAILURE);  
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes 
 | 
					 | 
				
			||||||
	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SharedMemoryZero(ShmCommBuf,bytes);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  assert(WorldShmSize == 1);
 | 
					 | 
				
			||||||
  for(int r=0;r<WorldShmSize;r++){
 | 
					 | 
				
			||||||
    WorldShmCommBufs[r] = ShmCommBuf;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  _ShmAllocBytes=bytes;
 | 
					 | 
				
			||||||
  _ShmAlloc=1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)  
 | 
					 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  void * ShmCommBuf ; 
 | 
					  void * ShmCommBuf ; 
 | 
				
			||||||
@@ -574,6 +536,9 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Each MPI rank should allocate our own buffer
 | 
					  // Each MPI rank should allocate our own buffer
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					  HostCommBuf= malloc(bytes);
 | 
				
			||||||
 | 
					#endif  
 | 
				
			||||||
  ShmCommBuf = acceleratorAllocDevice(bytes);
 | 
					  ShmCommBuf = acceleratorAllocDevice(bytes);
 | 
				
			||||||
  if (ShmCommBuf == (void *)NULL ) {
 | 
					  if (ShmCommBuf == (void *)NULL ) {
 | 
				
			||||||
    std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
 | 
					    std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
 | 
				
			||||||
@@ -738,7 +703,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
  _ShmAllocBytes=bytes;
 | 
					  _ShmAllocBytes=bytes;
 | 
				
			||||||
  _ShmAlloc=1;
 | 
					  _ShmAlloc=1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else 
 | 
					#else 
 | 
				
			||||||
#ifdef GRID_MPI3_SHMMMAP
 | 
					#ifdef GRID_MPI3_SHMMMAP
 | 
				
			||||||
@@ -962,6 +926,12 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  ShmBufferFreeAll();
 | 
					  ShmBufferFreeAll();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					  host_heap_size = heap_size;
 | 
				
			||||||
 | 
					  HostCommBuf= GlobalSharedMemory::HostCommBuf;
 | 
				
			||||||
 | 
					  HostBufferFreeAll();
 | 
				
			||||||
 | 
					#endif  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // find comm ranks in our SHM group (i.e. which ranks are on our node)
 | 
					  // find comm ranks in our SHM group (i.e. which ranks are on our node)
 | 
				
			||||||
  /////////////////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,8 +68,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Each MPI rank should allocate our own buffer
 | 
					  // Each MPI rank should allocate our own buffer
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  ShmCommBuf = acceleratorAllocShared(bytes);
 | 
					  ShmCommBuf = acceleratorAllocDevice(bytes);
 | 
				
			||||||
  //ShmCommBuf = acceleratorAllocDevice(bytes);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (ShmCommBuf == (void *)NULL ) {
 | 
					  if (ShmCommBuf == (void *)NULL ) {
 | 
				
			||||||
    std::cerr << " SharedMemoryNone.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
 | 
					    std::cerr << " SharedMemoryNone.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/lattice/Lattice_transpose.h>
 | 
					#include <Grid/lattice/Lattice_transpose.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_local.h>
 | 
					#include <Grid/lattice/Lattice_local.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_reduction.h>
 | 
					#include <Grid/lattice/Lattice_reduction.h>
 | 
				
			||||||
 | 
					#include <Grid/lattice/Lattice_crc.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_peekpoke.h>
 | 
					#include <Grid/lattice/Lattice_peekpoke.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_reality.h>
 | 
					#include <Grid/lattice/Lattice_reality.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_real_imag.h>
 | 
					#include <Grid/lattice/Lattice_real_imag.h>
 | 
				
			||||||
@@ -46,5 +47,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/lattice/Lattice_unary.h>
 | 
					#include <Grid/lattice/Lattice_unary.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_transfer.h>
 | 
					#include <Grid/lattice/Lattice_transfer.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_basis.h>
 | 
					#include <Grid/lattice/Lattice_basis.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_crc.h>
 | 
					 | 
				
			||||||
#include <Grid/lattice/PaddedCell.h>
 | 
					#include <Grid/lattice/PaddedCell.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,13 +42,13 @@ template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj> uint32_t crc(Lattice<vobj> & buf)
 | 
					template<class vobj> uint32_t crc(const Lattice<vobj> & buf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  autoView( buf_v , buf, CpuRead);
 | 
					  autoView( buf_v , buf, CpuRead);
 | 
				
			||||||
  return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
 | 
					  return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
 | 
					#define CRC(U) std::cerr << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,6 +31,7 @@ Author: Christoph Lehner <christoph@lhnr.de>
 | 
				
			|||||||
#if defined(GRID_SYCL)
 | 
					#if defined(GRID_SYCL)
 | 
				
			||||||
#include <Grid/lattice/Lattice_reduction_sycl.h>
 | 
					#include <Grid/lattice/Lattice_reduction_sycl.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#include <Grid/lattice/Lattice_slicesum_core.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -251,10 +252,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
 | 
				
			|||||||
    autoView( right_v,right, AcceleratorRead);
 | 
					    autoView( right_v,right, AcceleratorRead);
 | 
				
			||||||
    // This code could read coalesce
 | 
					    // This code could read coalesce
 | 
				
			||||||
    // GPU - SIMT lane compliance...
 | 
					    // GPU - SIMT lane compliance...
 | 
				
			||||||
    accelerator_for( ss, sites, 1,{
 | 
					    accelerator_for( ss, sites, nsimd,{
 | 
				
			||||||
        auto x_l = left_v[ss];
 | 
						auto x_l = left_v(ss);
 | 
				
			||||||
        auto y_l = right_v[ss];
 | 
						auto y_l = right_v(ss);
 | 
				
			||||||
        inner_tmp_v[ss]=innerProductD(x_l,y_l);
 | 
						coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l));
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
@@ -267,18 +268,11 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
 | 
				
			|||||||
    autoView( right_v,right, AcceleratorRead);
 | 
					    autoView( right_v,right, AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // GPU - SIMT lane compliance...
 | 
					    // GPU - SIMT lane compliance...
 | 
				
			||||||
    //accelerator_for( ss, sites, nsimd,{
 | 
					    accelerator_for( ss, sites, nsimd,{
 | 
				
			||||||
    //    auto x_l = left_v(ss);
 | 
						auto x_l = left_v(ss);
 | 
				
			||||||
    //    auto y_l = right_v(ss);
 | 
						auto y_l = right_v(ss);
 | 
				
			||||||
    //    coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
 | 
					 | 
				
			||||||
    //});
 | 
					 | 
				
			||||||
    #pragma omp target map ( to:left_v, right_v ) map ( tofrom:inner_tmp_v )
 | 
					 | 
				
			||||||
    #pragma omp teams distribute parallel for thread_limit(THREAD_LIMIT) //nowait
 | 
					 | 
				
			||||||
    for ( uint64_t ss=0;ss<sites;ss++) { 
 | 
					 | 
				
			||||||
        auto x_l = left_v[ss];
 | 
					 | 
				
			||||||
        auto y_l = right_v[ss];
 | 
					 | 
				
			||||||
	coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
 | 
						coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
 | 
				
			||||||
    }
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  // This is in single precision and fails some tests
 | 
					  // This is in single precision and fails some tests
 | 
				
			||||||
@@ -287,11 +281,29 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
 | 
				
			|||||||
  return nrm;
 | 
					  return nrm;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj>
 | 
					template<class vobj>
 | 
				
			||||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) {
 | 
					inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) {
 | 
				
			||||||
  GridBase *grid = left.Grid();
 | 
					  GridBase *grid = left.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					  uint64_t csum=0;
 | 
				
			||||||
 | 
					  if ( FlightRecorder::LoggingMode != FlightRecorder::LoggingModeNone)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    // Hack
 | 
				
			||||||
 | 
					    // Fast integer xor checksum. Can also be used in comms now.
 | 
				
			||||||
 | 
					    autoView(l_v,left,AcceleratorRead);
 | 
				
			||||||
 | 
					    Integer words = left.Grid()->oSites()*sizeof(vobj)/sizeof(uint64_t);
 | 
				
			||||||
 | 
					    uint64_t *base= (uint64_t *)&l_v[0];
 | 
				
			||||||
 | 
					    csum=svm_xor(base,words);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  FlightRecorder::CsumLog(csum);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  ComplexD nrm = rankInnerProduct(left,right);
 | 
					  ComplexD nrm = rankInnerProduct(left,right);
 | 
				
			||||||
 | 
					  RealD local = real(nrm);
 | 
				
			||||||
 | 
					  FlightRecorder::NormLog(real(nrm)); 
 | 
				
			||||||
  grid->GlobalSum(nrm);
 | 
					  grid->GlobalSum(nrm);
 | 
				
			||||||
 | 
					  FlightRecorder::ReductionLog(local,real(nrm)); 
 | 
				
			||||||
  return nrm;
 | 
					  return nrm;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -455,19 +467,10 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
 | 
				
			|||||||
  int e1=    grid->_slice_nblock[orthogdim];
 | 
					  int e1=    grid->_slice_nblock[orthogdim];
 | 
				
			||||||
  int e2=    grid->_slice_block [orthogdim];
 | 
					  int e2=    grid->_slice_block [orthogdim];
 | 
				
			||||||
  int stride=grid->_slice_stride[orthogdim];
 | 
					  int stride=grid->_slice_stride[orthogdim];
 | 
				
			||||||
 | 
					  int ostride=grid->_ostride[orthogdim];
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // sum over reduced dimension planes, breaking out orthog dir
 | 
					  //Reduce Data down to lvSum
 | 
				
			||||||
  // Parallel over orthog direction
 | 
					  sliceSumReduction(Data,lvSum,rd, e1,e2,stride,ostride,Nsimd);
 | 
				
			||||||
  autoView( Data_v, Data, CpuRead);
 | 
					 | 
				
			||||||
  thread_for( r,rd, {
 | 
					 | 
				
			||||||
    int so=r*grid->_ostride[orthogdim]; // base offset for start of plane 
 | 
					 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					 | 
				
			||||||
	int ss= so+n*stride+b;
 | 
					 | 
				
			||||||
	lvSum[r]=lvSum[r]+Data_v[ss];
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  });
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Sum across simd lanes in the plane, breaking out orthog dir.
 | 
					  // Sum across simd lanes in the plane, breaking out orthog dir.
 | 
				
			||||||
  Coordinate icoor(Nd);
 | 
					  Coordinate icoor(Nd);
 | 
				
			||||||
@@ -511,6 +514,7 @@ sliceSum(const Lattice<vobj> &Data,int orthogdim)
 | 
				
			|||||||
  return result;
 | 
					  return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj>
 | 
					template<class vobj>
 | 
				
			||||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim) 
 | 
					static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim) 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,9 +32,6 @@ int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &
 | 
				
			|||||||
#ifdef GRID_HIP
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
  auto r=hipGetDevice(&device);
 | 
					  auto r=hipGetDevice(&device);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_OMPTARGET
 | 
					 | 
				
			||||||
  device = omp_get_device_num();  
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  Iterator warpSize            = gpu_props[device].warpSize;
 | 
					  Iterator warpSize            = gpu_props[device].warpSize;
 | 
				
			||||||
  Iterator sharedMemPerBlock   = gpu_props[device].sharedMemPerBlock;
 | 
					  Iterator sharedMemPerBlock   = gpu_props[device].sharedMemPerBlock;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -69,29 +69,30 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite
 | 
				
			|||||||
  return result;
 | 
					  return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					template<class Word> Word svm_xor(Word *vec,uint64_t L)
 | 
				
			||||||
template<class Double> Double svm_reduce(Double *vec,uint64_t L)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  Double sumResult; zeroit(sumResult);
 | 
					  Word xorResult; xorResult = 0;
 | 
				
			||||||
  Double *d_sum =(Double *)cl::sycl::malloc_shared(sizeof(Double),*theGridAccelerator);
 | 
					  Word *d_sum =(Word *)cl::sycl::malloc_shared(sizeof(Word),*theGridAccelerator);
 | 
				
			||||||
  Double identity;  zeroit(identity);
 | 
					  Word identity;  identity=0;
 | 
				
			||||||
  theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
 | 
					  theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
 | 
				
			||||||
     auto Reduction = cl::sycl::reduction(d_sum,identity,std::plus<>());
 | 
					     auto Reduction = cl::sycl::reduction(d_sum,identity,std::bit_xor<>());
 | 
				
			||||||
     cgh.parallel_for(cl::sycl::range<1>{L},
 | 
					     cgh.parallel_for(cl::sycl::range<1>{L},
 | 
				
			||||||
		      Reduction,
 | 
							      Reduction,
 | 
				
			||||||
		      [=] (cl::sycl::id<1> index, auto &sum) {
 | 
							      [=] (cl::sycl::id<1> index, auto &sum) {
 | 
				
			||||||
	 sum +=vec[index];
 | 
						 sum ^=vec[index];
 | 
				
			||||||
     });
 | 
					     });
 | 
				
			||||||
   });
 | 
					   });
 | 
				
			||||||
  theGridAccelerator->wait();
 | 
					  theGridAccelerator->wait();
 | 
				
			||||||
  Double ret = d_sum[0];
 | 
					  Word ret = d_sum[0];
 | 
				
			||||||
  free(d_sum,*theGridAccelerator);
 | 
					  free(d_sum,*theGridAccelerator);
 | 
				
			||||||
  std::cout << " svm_reduce finished "<<L<<" sites sum = " << ret <<std::endl;
 | 
					 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class vobj>
 | 
					template <class vobj>
 | 
				
			||||||
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
 | 
					inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -411,7 +411,7 @@ public:
 | 
				
			|||||||
      std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
 | 
					      std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
 | 
				
			||||||
      SeedFixedIntegers(seeds);
 | 
					      SeedFixedIntegers(seeds);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  void SeedFixedIntegers(const std::vector<int> &seeds){
 | 
					  void SeedFixedIntegers(const std::vector<int> &seeds, int britney=0){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Everyone generates the same seed_seq based on input seeds
 | 
					    // Everyone generates the same seed_seq based on input seeds
 | 
				
			||||||
    CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
 | 
					    CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
 | 
				
			||||||
@@ -428,7 +428,6 @@ public:
 | 
				
			|||||||
    // MT implementation does not implement fast discard even though
 | 
					    // MT implementation does not implement fast discard even though
 | 
				
			||||||
    // in principle this is possible
 | 
					    // in principle this is possible
 | 
				
			||||||
    ////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////
 | 
				
			||||||
#if 1
 | 
					 | 
				
			||||||
    thread_for( lidx, _grid->lSites(), {
 | 
					    thread_for( lidx, _grid->lSites(), {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int gidx;
 | 
						int gidx;
 | 
				
			||||||
@@ -449,29 +448,12 @@ public:
 | 
				
			|||||||
	
 | 
						
 | 
				
			||||||
	int l_idx=generator_idx(o_idx,i_idx);
 | 
						int l_idx=generator_idx(o_idx,i_idx);
 | 
				
			||||||
	_generators[l_idx] = master_engine;
 | 
						_generators[l_idx] = master_engine;
 | 
				
			||||||
	Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 | 
						if ( britney ) { 
 | 
				
			||||||
    });
 | 
						  Skip(_generators[l_idx],l_idx); // Skip to next RNG sequence
 | 
				
			||||||
#else
 | 
						} else { 	
 | 
				
			||||||
    // Everybody loops over global volume.
 | 
					 | 
				
			||||||
    thread_for( gidx, _grid->_gsites, {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Where is it?
 | 
					 | 
				
			||||||
	int rank;
 | 
					 | 
				
			||||||
	int o_idx;
 | 
					 | 
				
			||||||
	int i_idx;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	Coordinate gcoor;
 | 
					 | 
				
			||||||
	_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
 | 
					 | 
				
			||||||
	_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	// If this is one of mine we take it
 | 
					 | 
				
			||||||
	if( rank == _grid->ThisRank() ){
 | 
					 | 
				
			||||||
	  int l_idx=generator_idx(o_idx,i_idx);
 | 
					 | 
				
			||||||
	  _generators[l_idx] = master_engine;
 | 
					 | 
				
			||||||
	  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 | 
						  Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#else 
 | 
					#else 
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////
 | 
				
			||||||
    // Machine and thread decomposition dependent seeding is efficient
 | 
					    // Machine and thread decomposition dependent seeding is efficient
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										213
									
								
								Grid/lattice/Lattice_slicesum_core.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										213
									
								
								Grid/lattice/Lattice_slicesum_core.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,213 @@
 | 
				
			|||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					#include <type_traits>
 | 
				
			||||||
 | 
					#if defined(GRID_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cub/cub.cuh>
 | 
				
			||||||
 | 
					#define gpucub cub
 | 
				
			||||||
 | 
					#define gpuError_t cudaError_t
 | 
				
			||||||
 | 
					#define gpuSuccess cudaSuccess
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#elif defined(GRID_HIP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <hipcub/hipcub.hpp>
 | 
				
			||||||
 | 
					#define gpucub hipcub
 | 
				
			||||||
 | 
					#define gpuError_t hipError_t
 | 
				
			||||||
 | 
					#define gpuSuccess hipSuccess
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_CUDA) || defined(GRID_HIP)
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction_cub_small(const vobj *Data, Vector<vobj> &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd) {
 | 
				
			||||||
 | 
					  size_t subvol_size = e1*e2;
 | 
				
			||||||
 | 
					  commVector<vobj> reduction_buffer(rd*subvol_size);
 | 
				
			||||||
 | 
					  auto rb_p = &reduction_buffer[0];
 | 
				
			||||||
 | 
					  vobj zero_init;
 | 
				
			||||||
 | 
					  zeroit(zero_init);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  void *temp_storage_array = NULL;
 | 
				
			||||||
 | 
					  size_t temp_storage_bytes = 0;
 | 
				
			||||||
 | 
					  vobj *d_out;
 | 
				
			||||||
 | 
					  int* d_offsets;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> offsets(rd+1,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int i = 0; i < offsets.size(); i++) {
 | 
				
			||||||
 | 
					    offsets[i] = i*subvol_size;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Allocate memory for output and offset arrays on device
 | 
				
			||||||
 | 
					  d_out = static_cast<vobj*>(acceleratorAllocDevice(rd*sizeof(vobj)));
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  d_offsets = static_cast<int*>(acceleratorAllocDevice((rd+1)*sizeof(int)));
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //copy offsets to device
 | 
				
			||||||
 | 
					  acceleratorCopyToDeviceAsync(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream);
 | 
				
			||||||
 | 
					  if (gpuErr!=gpuSuccess) {
 | 
				
			||||||
 | 
					    std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce (setup)! Error: " << gpuErr <<std::endl;
 | 
				
			||||||
 | 
					    exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //allocate memory for temp_storage_array  
 | 
				
			||||||
 | 
					  temp_storage_array = acceleratorAllocDevice(temp_storage_bytes);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //prepare buffer for reduction
 | 
				
			||||||
 | 
					  //use non-blocking accelerator_for to avoid syncs (ok because we submit to same computeStream)
 | 
				
			||||||
 | 
					  //use 2d accelerator_for to avoid launch latencies found when serially looping over rd 
 | 
				
			||||||
 | 
					  accelerator_for2dNB( s,subvol_size, r,rd, Nsimd,{ 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    int n = s / e2;
 | 
				
			||||||
 | 
					    int b = s % e2;
 | 
				
			||||||
 | 
					    int so=r*ostride; // base offset for start of plane 
 | 
				
			||||||
 | 
					    int ss= so+n*stride+b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    coalescedWrite(rb_p[r*subvol_size+s], coalescedRead(Data[ss]));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //issue segmented reductions in computeStream
 | 
				
			||||||
 | 
					  gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p, d_out, rd, d_offsets, d_offsets+1,::gpucub::Sum(), zero_init, computeStream);
 | 
				
			||||||
 | 
					  if (gpuErr!=gpuSuccess) {
 | 
				
			||||||
 | 
					    std::cout << GridLogError << "Lattice_slicesum_gpu.h: Encountered error during gpucub::DeviceSegmentedReduce::Reduce! Error: " << gpuErr <<std::endl;
 | 
				
			||||||
 | 
					    exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  acceleratorCopyFromDeviceAsync(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //sync after copy
 | 
				
			||||||
 | 
					  accelerator_barrier();
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  acceleratorFreeDevice(temp_storage_array);
 | 
				
			||||||
 | 
					  acceleratorFreeDevice(d_out);
 | 
				
			||||||
 | 
					  acceleratorFreeDevice(d_offsets);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction_cub_large(const vobj *Data, Vector<vobj> &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd) {
 | 
				
			||||||
 | 
					  typedef typename vobj::vector_type vector;
 | 
				
			||||||
 | 
					  const int words = sizeof(vobj)/sizeof(vector);
 | 
				
			||||||
 | 
					  const int osites = rd*e1*e2;
 | 
				
			||||||
 | 
					  commVector<vector>buffer(osites);
 | 
				
			||||||
 | 
					  vector *dat = (vector *)Data;
 | 
				
			||||||
 | 
					  vector *buf = &buffer[0];
 | 
				
			||||||
 | 
					  Vector<vector> lvSum_small(rd);
 | 
				
			||||||
 | 
					  vector *lvSum_ptr = (vector *)&lvSum[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int w = 0; w < words; w++) {
 | 
				
			||||||
 | 
					    accelerator_for(ss,osites,1,{
 | 
				
			||||||
 | 
						    buf[ss] = dat[ss*words+w];
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sliceSumReduction_cub_small(buf,lvSum_small,rd,e1,e2,stride, ostride,Nsimd);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    for (int r = 0; r < rd; r++) {
 | 
				
			||||||
 | 
					      lvSum_ptr[w+words*r]=lvSum_small[r];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction_cub(const Lattice<vobj> &Data, Vector<vobj> &lvSum, const int rd, const int e1, const int e2, const int stride, const int ostride, const int Nsimd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  autoView(Data_v, Data, AcceleratorRead); //hipcub/cub cannot deal with large vobjs so we split into small/large case.
 | 
				
			||||||
 | 
					    if constexpr (sizeof(vobj) <= 256) { 
 | 
				
			||||||
 | 
					      sliceSumReduction_cub_small(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else {
 | 
				
			||||||
 | 
					      sliceSumReduction_cub_large(&Data_v[0], lvSum, rd, e1, e2, stride, ostride, Nsimd);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_SYCL)
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction_sycl(const Lattice<vobj> &Data, Vector <vobj> &lvSum, const int  &rd, const int &e1, const int &e2, const int &stride, const int &ostride, const int &Nsimd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
 | 
					  size_t subvol_size = e1*e2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  vobj *mysum = (vobj *) malloc_shared(sizeof(vobj),*theGridAccelerator);
 | 
				
			||||||
 | 
					  vobj vobj_zero;
 | 
				
			||||||
 | 
					  zeroit(vobj_zero);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  commVector<vobj> reduction_buffer(rd*subvol_size);    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto rb_p = &reduction_buffer[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  autoView(Data_v, Data, AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //prepare reduction buffer 
 | 
				
			||||||
 | 
					  accelerator_for2d( s,subvol_size, r,rd, (size_t)Nsimd,{ 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					      int n = s / e2;
 | 
				
			||||||
 | 
					      int b = s % e2;
 | 
				
			||||||
 | 
					      int so=r*ostride; // base offset for start of plane 
 | 
				
			||||||
 | 
					      int ss= so+n*stride+b;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      coalescedWrite(rb_p[r*subvol_size+s], coalescedRead(Data_v[ss]));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int r = 0; r < rd; r++) {
 | 
				
			||||||
 | 
					      mysum[0] = vobj_zero; //dirty hack: cannot pass vobj_zero as identity to sycl::reduction as its not device_copyable
 | 
				
			||||||
 | 
					      theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
 | 
				
			||||||
 | 
					          auto Reduction = cl::sycl::reduction(mysum,std::plus<>());
 | 
				
			||||||
 | 
					          cgh.parallel_for(cl::sycl::range<1>{subvol_size},
 | 
				
			||||||
 | 
					          Reduction,
 | 
				
			||||||
 | 
					          [=](cl::sycl::id<1> item, auto &sum) {
 | 
				
			||||||
 | 
					              auto s = item[0];
 | 
				
			||||||
 | 
					              sum += rb_p[r*subvol_size+s];
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      theGridAccelerator->wait();
 | 
				
			||||||
 | 
					      lvSum[r] = mysum[0];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  free(mysum,*theGridAccelerator);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction_cpu(const Lattice<vobj> &Data, Vector<vobj> &lvSum, const int &rd, const int &e1, const int &e2, const int &stride, const int &ostride, const int &Nsimd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  // sum over reduced dimension planes, breaking out orthog dir
 | 
				
			||||||
 | 
					  // Parallel over orthog direction
 | 
				
			||||||
 | 
					  autoView( Data_v, Data, CpuRead);
 | 
				
			||||||
 | 
					  thread_for( r,rd, {
 | 
				
			||||||
 | 
					    int so=r*ostride; // base offset for start of plane 
 | 
				
			||||||
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
 | 
					        int ss= so+n*stride+b;
 | 
				
			||||||
 | 
					        lvSum[r]=lvSum[r]+Data_v[ss];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumReduction(const Lattice<vobj> &Data, Vector<vobj> &lvSum, const int &rd, const int &e1, const int &e2, const int &stride, const int &ostride, const int &Nsimd) 
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  #if defined(GRID_CUDA) || defined(GRID_HIP)
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  sliceSumReduction_cub(Data, lvSum, rd, e1, e2, stride, ostride, Nsimd);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  #elif defined(GRID_SYCL)
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  sliceSumReduction_sycl(Data, lvSum, rd, e1, e2, stride, ostride, Nsimd);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  #else
 | 
				
			||||||
 | 
					  sliceSumReduction_cpu(Data, lvSum, rd, e1, e2, stride, ostride, Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -80,7 +80,7 @@ public:
 | 
				
			|||||||
  accelerator_inline uint64_t end(void)   const { return this->_odata_size; };
 | 
					  accelerator_inline uint64_t end(void)   const { return this->_odata_size; };
 | 
				
			||||||
  accelerator_inline uint64_t size(void)  const { return this->_odata_size; };
 | 
					  accelerator_inline uint64_t size(void)  const { return this->_odata_size; };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){ }
 | 
					  LatticeView(const LatticeAccelerator<vobj> &refer_to_me) : LatticeAccelerator<vobj> (refer_to_me){}
 | 
				
			||||||
  LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
 | 
					  LatticeView(const LatticeView<vobj> &refer_to_me) = default; // Trivially copyable
 | 
				
			||||||
  LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
 | 
					  LatticeView(const LatticeAccelerator<vobj> &refer_to_me,ViewMode mode) : LatticeAccelerator<vobj> (refer_to_me)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -179,11 +179,11 @@ extern GridLogger GridLogSolver;
 | 
				
			|||||||
extern GridLogger GridLogError;
 | 
					extern GridLogger GridLogError;
 | 
				
			||||||
extern GridLogger GridLogWarning;
 | 
					extern GridLogger GridLogWarning;
 | 
				
			||||||
extern GridLogger GridLogMessage;
 | 
					extern GridLogger GridLogMessage;
 | 
				
			||||||
extern GridLogger GridLogDebug  ;
 | 
					extern GridLogger GridLogDebug;
 | 
				
			||||||
extern GridLogger GridLogPerformance;
 | 
					extern GridLogger GridLogPerformance;
 | 
				
			||||||
extern GridLogger GridLogDslash;
 | 
					extern GridLogger GridLogDslash;
 | 
				
			||||||
extern GridLogger GridLogIterative  ;
 | 
					extern GridLogger GridLogIterative;
 | 
				
			||||||
extern GridLogger GridLogIntegrator  ;
 | 
					extern GridLogger GridLogIntegrator;
 | 
				
			||||||
extern GridLogger GridLogHMC;
 | 
					extern GridLogger GridLogHMC;
 | 
				
			||||||
extern GridLogger GridLogMemory;
 | 
					extern GridLogger GridLogMemory;
 | 
				
			||||||
extern GridLogger GridLogTracing;
 | 
					extern GridLogger GridLogTracing;
 | 
				
			||||||
@@ -191,6 +191,41 @@ extern Colours    GridLogColours;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
std::string demangle(const char* name) ;
 | 
					std::string demangle(const char* name) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<typename... Args>
 | 
				
			||||||
 | 
					inline std::string sjoin(Args&&... args) noexcept {
 | 
				
			||||||
 | 
					    std::ostringstream msg;
 | 
				
			||||||
 | 
					    (msg << ... << args);
 | 
				
			||||||
 | 
					    return msg.str();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief make log messages work like python print */
 | 
				
			||||||
 | 
					template <typename... Args>
 | 
				
			||||||
 | 
					inline void Grid_log(Args&&... args) {
 | 
				
			||||||
 | 
					    std::string msg = sjoin(std::forward<Args>(args)...);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << msg << std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief make warning messages work like python print */
 | 
				
			||||||
 | 
					template <typename... Args>
 | 
				
			||||||
 | 
					inline void Grid_warn(Args&&... args) {
 | 
				
			||||||
 | 
					    std::string msg = sjoin(std::forward<Args>(args)...);
 | 
				
			||||||
 | 
					    std::cout << "\033[33m" << GridLogWarning << msg << "\033[0m" << std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief make error messages work like python print */
 | 
				
			||||||
 | 
					template <typename... Args>
 | 
				
			||||||
 | 
					inline void Grid_error(Args&&... args) {
 | 
				
			||||||
 | 
					    std::string msg = sjoin(std::forward<Args>(args)...);
 | 
				
			||||||
 | 
					    std::cout << "\033[31m" << GridLogError << msg << "\033[0m" << std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief make pass messages work like python print */
 | 
				
			||||||
 | 
					template <typename... Args>
 | 
				
			||||||
 | 
					inline void Grid_pass(Args&&... args) {
 | 
				
			||||||
 | 
					    std::string msg = sjoin(std::forward<Args>(args)...);
 | 
				
			||||||
 | 
					    std::cout << "\033[32m" << GridLogMessage << msg << "\033[0m" << std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _NBACKTRACE (256)
 | 
					#define _NBACKTRACE (256)
 | 
				
			||||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
					extern void * Grid_backtrace_buffer[_NBACKTRACE];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,7 +34,7 @@ class GridTracer {
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
inline void tracePush(const char *name) { roctxRangePushA(name); }
 | 
					inline void tracePush(const char *name) { roctxRangePushA(name); }
 | 
				
			||||||
inline void tracePop(const char *name) { roctxRangePop(); }
 | 
					inline void tracePop(const char *name) { roctxRangePop(); }
 | 
				
			||||||
inline int  traceStart(const char *name) { roctxRangeStart(name); }
 | 
					inline int  traceStart(const char *name) { return roctxRangeStart(name); }
 | 
				
			||||||
inline void traceStop(int ID) { roctxRangeStop(ID); }
 | 
					inline void traceStop(int ID) { roctxRangeStop(ID); }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -280,20 +280,16 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  if( interior && exterior ) { 
 | 
					  if( interior && exterior ) { 
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1);    return;}
 | 
				
			||||||
 | 
					#ifndef GRID_CUDA
 | 
				
			||||||
    if (Opt == OptInlineAsm  ) {  ASM_CALL(DhopSiteAsm);     return;}
 | 
					    if (Opt == OptInlineAsm  ) {  ASM_CALL(DhopSiteAsm);     return;}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  } else if( interior ) {
 | 
					  } else if( interior ) {
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1);    return;}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  } else if( exterior ) { 
 | 
					  } else if( exterior ) { 
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1);    return;}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  assert(0 && " Kernel optimisation case not covered ");
 | 
					  assert(0 && " Kernel optimisation case not covered ");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -322,19 +318,13 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
  if( interior && exterior ) { 
 | 
					  if( interior && exterior ) { 
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0);    return;}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  } else if( interior ) {
 | 
					  } else if( interior ) {
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0);    return;}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  } else if( exterior ) { 
 | 
					  } else if( exterior ) { 
 | 
				
			||||||
    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
 | 
					    if (Opt == OptGeneric    ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					 | 
				
			||||||
    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0);    return;}
 | 
					    if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0);    return;}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -462,6 +462,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField
 | 
				
			|||||||
    autoView(st_v , st,AcceleratorRead);
 | 
					    autoView(st_v , st,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   if( interior && exterior ) {
 | 
					   if( interior && exterior ) {
 | 
				
			||||||
 | 
					     acceleratorFenceComputeStream();
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSite); return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSite); return;}
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite);    return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					#ifndef GRID_CUDA
 | 
				
			||||||
@@ -495,6 +496,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField
 | 
				
			|||||||
    autoView(st_v ,st,AcceleratorRead);
 | 
					    autoView(st_v ,st,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   if( interior && exterior ) {
 | 
					   if( interior && exterior ) {
 | 
				
			||||||
 | 
					     acceleratorFenceComputeStream();
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDag); return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDag); return;}
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag);    return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					#ifndef GRID_CUDA
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -237,7 +237,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for (int level = 0; level < as.size(); ++level) {
 | 
					    for (int level = 0; level < as.size(); ++level) {
 | 
				
			||||||
      int multiplier = as.at(level).multiplier;
 | 
					      int multiplier = as.at(level).multiplier;
 | 
				
			||||||
      ActionLevel<Field> * Level = new ActionLevel<Field>(multiplier);
 | 
					      ActionLevel<Field, RepresentationPolicy> * Level = new ActionLevel<Field, RepresentationPolicy>(multiplier);
 | 
				
			||||||
      Level->push_back(new EmptyAction<Field>); 
 | 
					      Level->push_back(new EmptyAction<Field>); 
 | 
				
			||||||
      LevelForces.push_back(*Level);
 | 
					      LevelForces.push_back(*Level);
 | 
				
			||||||
      // does it copy by value or reference??
 | 
					      // does it copy by value or reference??
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										389
									
								
								Grid/qcd/smearing/HISQSmearing.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										389
									
								
								Grid/qcd/smearing/HISQSmearing.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,389 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/smearing/HISQSmearing.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2023
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: D. A. Clarke <clarke.davida@gmail.com> 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					    @file HISQSmearing.h
 | 
				
			||||||
 | 
					    @brief Declares classes related to HISQ smearing 
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/lattice/PaddedCell.h>
 | 
				
			||||||
 | 
					#include <Grid/stencil/GeneralLocalStencil.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO: find a way to fold this into the stencil header. need to access grid to get
 | 
				
			||||||
 | 
					// Nd, since you don't want to inherit from QCD.h
 | 
				
			||||||
 | 
					/*!  @brief append arbitrary shift path to shifts */
 | 
				
			||||||
 | 
					template<typename... Args>
 | 
				
			||||||
 | 
					void appendShift(std::vector<Coordinate>& shifts, int dir, Args... args) {
 | 
				
			||||||
 | 
					    Coordinate shift(Nd,0);
 | 
				
			||||||
 | 
					    generalShift(shift, dir, args...); 
 | 
				
			||||||
 | 
					    // push_back creates an element at the end of shifts and
 | 
				
			||||||
 | 
					    // assigns the data in the argument to it.
 | 
				
			||||||
 | 
					    shifts.push_back(shift);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief figure out the stencil index from mu and nu */
 | 
				
			||||||
 | 
					accelerator_inline int stencilIndex(int mu, int nu) {
 | 
				
			||||||
 | 
					    // Nshifts depends on how you built the stencil
 | 
				
			||||||
 | 
					    int Nshifts = 6;
 | 
				
			||||||
 | 
					    return Nshifts*nu + Nd*Nshifts*mu;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief structure holding the link treatment */
 | 
				
			||||||
 | 
					struct SmearingParameters{
 | 
				
			||||||
 | 
					    SmearingParameters(){}
 | 
				
			||||||
 | 
					    Real c_1;               // 1 link
 | 
				
			||||||
 | 
					    Real c_naik;            // Naik term
 | 
				
			||||||
 | 
					    Real c_3;               // 3 link
 | 
				
			||||||
 | 
					    Real c_5;               // 5 link
 | 
				
			||||||
 | 
					    Real c_7;               // 7 link
 | 
				
			||||||
 | 
					    Real c_lp;              // 5 link Lepage
 | 
				
			||||||
 | 
					    SmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) 
 | 
				
			||||||
 | 
					        : c_1(c1),
 | 
				
			||||||
 | 
					          c_naik(cnaik),
 | 
				
			||||||
 | 
					          c_3(c3),
 | 
				
			||||||
 | 
					          c_5(c5),
 | 
				
			||||||
 | 
					          c_7(c7),
 | 
				
			||||||
 | 
					          c_lp(clp){}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief create fat links from link variables */
 | 
				
			||||||
 | 
					template<class Gimpl> 
 | 
				
			||||||
 | 
					class Smear_HISQ : public Gimpl {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					    GridCartesian* const _grid;
 | 
				
			||||||
 | 
					    SmearingParameters _linkTreatment;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					    typedef typename Gimpl::GaugeField     GF;
 | 
				
			||||||
 | 
					    typedef typename Gimpl::GaugeLinkField LF;
 | 
				
			||||||
 | 
					    typedef typename Gimpl::ComplexField   CF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Don't allow default values here.
 | 
				
			||||||
 | 
					    Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) 
 | 
				
			||||||
 | 
					        : _grid(grid), 
 | 
				
			||||||
 | 
					          _linkTreatment(c1,cnaik,c3,c5,c7,clp) {
 | 
				
			||||||
 | 
					        assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3");
 | 
				
			||||||
 | 
					        assert(Nd == 4 && "HISQ smearing only defined for Nd==4");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Allow to pass a pointer to a C-style, double array for MILC convenience
 | 
				
			||||||
 | 
					    Smear_HISQ(GridCartesian* grid, double* coeff) 
 | 
				
			||||||
 | 
					        : _grid(grid), 
 | 
				
			||||||
 | 
					          _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) {
 | 
				
			||||||
 | 
					        assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3");
 | 
				
			||||||
 | 
					        assert(Nd == 4 && "HISQ smearing only defined for Nd==4");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ~Smear_HISQ() {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Intent: OUT--u_smr, u_naik
 | 
				
			||||||
 | 
					    //          IN--u_thin
 | 
				
			||||||
 | 
					    void smear(GF& u_smr, GF& u_naik, GF& u_thin) const {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        SmearingParameters lt = this->_linkTreatment;
 | 
				
			||||||
 | 
					        auto grid = this->_grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Create a padded cell of extra padding depth=1 and fill the padding.
 | 
				
			||||||
 | 
					        int depth = 1;
 | 
				
			||||||
 | 
					        PaddedCell Ghost(depth,grid);
 | 
				
			||||||
 | 
					        GF Ughost = Ghost.Exchange(u_thin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // This is where auxiliary N-link fields and the final smear will be stored. 
 | 
				
			||||||
 | 
					        GF Ughost_fat(Ughost.Grid());
 | 
				
			||||||
 | 
					        GF Ughost_3link(Ughost.Grid());
 | 
				
			||||||
 | 
					        GF Ughost_5linkA(Ughost.Grid());
 | 
				
			||||||
 | 
					        GF Ughost_5linkB(Ughost.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier,
 | 
				
			||||||
 | 
					        // but these entries will not be used. 
 | 
				
			||||||
 | 
					        std::vector<Coordinate> shifts;
 | 
				
			||||||
 | 
					        for(int mu=0;mu<Nd;mu++)
 | 
				
			||||||
 | 
					        for(int nu=0;nu<Nd;nu++) {
 | 
				
			||||||
 | 
					            appendShift(shifts,mu);
 | 
				
			||||||
 | 
					            appendShift(shifts,nu);
 | 
				
			||||||
 | 
					            appendShift(shifts,shiftSignal::NO_SHIFT);
 | 
				
			||||||
 | 
					            appendShift(shifts,mu,Back(nu));
 | 
				
			||||||
 | 
					            appendShift(shifts,Back(nu));
 | 
				
			||||||
 | 
					            appendShift(shifts,Back(mu));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // A GeneralLocalStencil has two indices: a site and stencil index 
 | 
				
			||||||
 | 
					        GeneralLocalStencil gStencil(Ughost.Grid(),shifts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // This is where contributions from the smearing get added together
 | 
				
			||||||
 | 
					        Ughost_fat=Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik.
 | 
				
			||||||
 | 
					        for(int mu=0;mu<Nd;mu++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // TODO: This approach is slightly memory inefficient. It uses 25% extra memory 
 | 
				
			||||||
 | 
					            Ughost_3link =Zero();
 | 
				
			||||||
 | 
					            Ughost_5linkA=Zero();
 | 
				
			||||||
 | 
					            Ughost_5linkB=Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Create the accessors
 | 
				
			||||||
 | 
					            autoView(U_v       , Ughost       , AcceleratorRead);
 | 
				
			||||||
 | 
					            autoView(U_fat_v   , Ughost_fat   , AcceleratorWrite);
 | 
				
			||||||
 | 
					            autoView(U_3link_v , Ughost_3link , AcceleratorWrite);
 | 
				
			||||||
 | 
					            autoView(U_5linkA_v, Ughost_5linkA, AcceleratorWrite);
 | 
				
			||||||
 | 
					            autoView(U_5linkB_v, Ughost_5linkB, AcceleratorWrite);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // We infer some types that will be needed in the calculation.
 | 
				
			||||||
 | 
					            typedef decltype(gStencil.GetEntry(0,0)) stencilElement;
 | 
				
			||||||
 | 
					            typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            int Nsites = U_v.size();
 | 
				
			||||||
 | 
					            auto gStencil_v = gStencil.View(); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
 | 
				
			||||||
 | 
					                stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
 | 
				
			||||||
 | 
					                U3matrix U0, U1, U2, U3, U4, U5, W;
 | 
				
			||||||
 | 
					                for(int nu=0;nu<Nd;nu++) {
 | 
				
			||||||
 | 
					                    if(nu==mu) continue;
 | 
				
			||||||
 | 
					                    int s = stencilIndex(mu,nu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    // The stencil gives us support points in the mu-nu plane that we will use to
 | 
				
			||||||
 | 
					                    // grab the links we need.
 | 
				
			||||||
 | 
					                    SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu      = SE0->_offset;
 | 
				
			||||||
 | 
					                    SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu      = SE1->_offset;
 | 
				
			||||||
 | 
					                    SE2 = gStencil_v.GetEntry(s+2,site); int x           = SE2->_offset;
 | 
				
			||||||
 | 
					                    SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
 | 
				
			||||||
 | 
					                    SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu      = SE4->_offset;
 | 
				
			||||||
 | 
					                    SE5 = gStencil_v.GetEntry(s+5,site); int x_m_mu      = SE5->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    // When you're deciding whether to take an adjoint, the question is: how is the
 | 
				
			||||||
 | 
					                    // stored link oriented compared to the one you want? If I imagine myself travelling
 | 
				
			||||||
 | 
					                    // with the to-be-updated link, I have two possible, alternative 3-link paths I can
 | 
				
			||||||
 | 
					                    // take, one starting by going to the left, the other starting by going to the right.
 | 
				
			||||||
 | 
					                    U0 = coalescedReadGeneralPermute(U_v[x_p_mu     ](nu),SE0->_permute,Nd);
 | 
				
			||||||
 | 
					                    U1 = coalescedReadGeneralPermute(U_v[x_p_nu     ](mu),SE1->_permute,Nd);
 | 
				
			||||||
 | 
					                    U2 = coalescedReadGeneralPermute(U_v[x          ](nu),SE2->_permute,Nd);
 | 
				
			||||||
 | 
					                    U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
 | 
				
			||||||
 | 
					                    U4 = coalescedReadGeneralPermute(U_v[x_m_nu     ](mu),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					                    U5 = coalescedReadGeneralPermute(U_v[x_m_nu     ](nu),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    //  "left"          "right"
 | 
				
			||||||
 | 
					                    W = U2*U1*adj(U0) + adj(U5)*U4*U3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    // Save 3-link construct for later and add to smeared field.
 | 
				
			||||||
 | 
					                    coalescedWrite(U_3link_v[x](nu), W);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    // The index operator (x) returns the coalesced read on GPU. The view [] index returns 
 | 
				
			||||||
 | 
					                    // a reference to the vector object. The [x](mu) returns a reference to the densely 
 | 
				
			||||||
 | 
					                    // packed (contiguous in memory) mu-th element of the vector object. On CPU, 
 | 
				
			||||||
 | 
					                    // coalescedRead/Write is the identity mapping assigning vector object to vector object.
 | 
				
			||||||
 | 
					                    // But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
 | 
				
			||||||
 | 
					                    coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link 
 | 
				
			||||||
 | 
					                stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
 | 
				
			||||||
 | 
					                U3matrix U0, U1, U2, U3, U4, U5, W;
 | 
				
			||||||
 | 
					                int sigmaIndex = 0;
 | 
				
			||||||
 | 
					                for(int nu=0;nu<Nd;nu++) {
 | 
				
			||||||
 | 
					                    if(nu==mu) continue;
 | 
				
			||||||
 | 
					                    int s = stencilIndex(mu,nu);
 | 
				
			||||||
 | 
					                    for(int rho=0;rho<Nd;rho++) {
 | 
				
			||||||
 | 
					                        if (rho == mu || rho == nu) continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu      = SE0->_offset;
 | 
				
			||||||
 | 
					                        SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu      = SE1->_offset;
 | 
				
			||||||
 | 
					                        SE2 = gStencil_v.GetEntry(s+2,site); int x           = SE2->_offset;
 | 
				
			||||||
 | 
					                        SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
 | 
				
			||||||
 | 
					                        SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu      = SE4->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        U0 = coalescedReadGeneralPermute(      U_v[x_p_mu     ](nu ),SE0->_permute,Nd);
 | 
				
			||||||
 | 
					                        U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu     ](rho),SE1->_permute,Nd);
 | 
				
			||||||
 | 
					                        U2 = coalescedReadGeneralPermute(      U_v[x          ](nu ),SE2->_permute,Nd);
 | 
				
			||||||
 | 
					                        U3 = coalescedReadGeneralPermute(      U_v[x_p_mu_m_nu](nu ),SE3->_permute,Nd);
 | 
				
			||||||
 | 
					                        U4 = coalescedReadGeneralPermute(U_3link_v[x_m_nu     ](rho),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					                        U5 = coalescedReadGeneralPermute(      U_v[x_m_nu     ](nu ),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        W  = U2*U1*adj(U0) + adj(U5)*U4*U3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        if(sigmaIndex<3) {
 | 
				
			||||||
 | 
					                            coalescedWrite(U_5linkA_v[x](rho), W);
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            coalescedWrite(U_5linkB_v[x](rho), W);
 | 
				
			||||||
 | 
					                        }    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
 | 
				
			||||||
 | 
					                        sigmaIndex++;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
 | 
				
			||||||
 | 
					                stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
 | 
				
			||||||
 | 
					                U3matrix U0, U1, U2, U3, U4, U5, W;
 | 
				
			||||||
 | 
					                int sigmaIndex = 0;
 | 
				
			||||||
 | 
					                for(int nu=0;nu<Nd;nu++) {
 | 
				
			||||||
 | 
					                    if(nu==mu) continue;
 | 
				
			||||||
 | 
					                    int s = stencilIndex(mu,nu);
 | 
				
			||||||
 | 
					                    for(int rho=0;rho<Nd;rho++) {
 | 
				
			||||||
 | 
					                        if (rho == mu || rho == nu) continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu      = SE0->_offset;
 | 
				
			||||||
 | 
					                        SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu      = SE1->_offset;
 | 
				
			||||||
 | 
					                        SE2 = gStencil_v.GetEntry(s+2,site); int x           = SE2->_offset;
 | 
				
			||||||
 | 
					                        SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
 | 
				
			||||||
 | 
					                        SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu      = SE4->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd);
 | 
				
			||||||
 | 
					                        if(sigmaIndex<3) {
 | 
				
			||||||
 | 
					                            U1 = coalescedReadGeneralPermute(U_5linkB_v[x_p_nu](rho),SE1->_permute,Nd);
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            U1 = coalescedReadGeneralPermute(U_5linkA_v[x_p_nu](rho),SE1->_permute,Nd);
 | 
				
			||||||
 | 
					                        }  
 | 
				
			||||||
 | 
					                        U2 = coalescedReadGeneralPermute(U_v[x](nu),SE2->_permute,Nd);
 | 
				
			||||||
 | 
					                        U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
 | 
				
			||||||
 | 
					                        if(sigmaIndex<3) {
 | 
				
			||||||
 | 
					                            U4 = coalescedReadGeneralPermute(U_5linkB_v[x_m_nu](rho),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            U4 = coalescedReadGeneralPermute(U_5linkA_v[x_m_nu](rho),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					                        }  
 | 
				
			||||||
 | 
					                        U5 = coalescedReadGeneralPermute(U_v[x_m_nu](nu),SE4->_permute,Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        W  = U2*U1*adj(U0) + adj(U5)*U4*U3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W);
 | 
				
			||||||
 | 
					                        sigmaIndex++;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        } // end mu loop
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // c1, c3, c5, c7 construct contributions
 | 
				
			||||||
 | 
					        u_smr = Ghost.Extract(Ughost_fat) + lt.c_1*u_thin;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Load up U and V std::vectors to access thin and smeared links.
 | 
				
			||||||
 | 
					        std::vector<LF> U(Nd, grid);
 | 
				
			||||||
 | 
					        std::vector<LF> V(Nd, grid);
 | 
				
			||||||
 | 
					        std::vector<LF> Vnaik(Nd, grid);
 | 
				
			||||||
 | 
					        for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					            U[mu] = PeekIndex<LorentzIndex>(u_thin, mu);
 | 
				
			||||||
 | 
					            V[mu] = PeekIndex<LorentzIndex>(u_smr, mu);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int mu=0;mu<Nd;mu++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Naik
 | 
				
			||||||
 | 
					            Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu,
 | 
				
			||||||
 | 
					                                    Gimpl::CovShiftForward(U[mu],mu,
 | 
				
			||||||
 | 
					                                      Gimpl::CovShiftIdentityForward(U[mu],mu)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // LePage
 | 
				
			||||||
 | 
					            for (int nu_h=1;nu_h<Nd;nu_h++) {
 | 
				
			||||||
 | 
					                int nu=(mu+nu_h)%Nd;
 | 
				
			||||||
 | 
					                                // nu, nu, mu, Back(nu), Back(nu)
 | 
				
			||||||
 | 
					                V[mu] = V[mu] + lt.c_lp*Gimpl::CovShiftForward(U[nu],nu,
 | 
				
			||||||
 | 
					                                          Gimpl::CovShiftForward(U[nu],nu,
 | 
				
			||||||
 | 
					                                            Gimpl::CovShiftForward(U[mu],mu,
 | 
				
			||||||
 | 
					                                              Gimpl::CovShiftBackward(U[nu],nu,
 | 
				
			||||||
 | 
					                                                Gimpl::CovShiftIdentityBackward(U[nu],nu)))))
 | 
				
			||||||
 | 
					                                // Back(nu), Back(nu), mu, nu, nu
 | 
				
			||||||
 | 
					                              + lt.c_lp*Gimpl::CovShiftBackward(U[nu],nu,
 | 
				
			||||||
 | 
					                                          Gimpl::CovShiftBackward(U[nu],nu,
 | 
				
			||||||
 | 
					                                            Gimpl::CovShiftForward(U[mu],mu,
 | 
				
			||||||
 | 
					                                              Gimpl::CovShiftForward(U[nu],nu,
 | 
				
			||||||
 | 
					                                                Gimpl::CovShiftIdentityForward(U[nu],nu)))));
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Put V back into u_smr.
 | 
				
			||||||
 | 
					        for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					            PokeIndex<LorentzIndex>(u_smr , V[mu]    , mu);
 | 
				
			||||||
 | 
					            PokeIndex<LorentzIndex>(u_naik, Vnaik[mu], mu);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Intent: OUT--u_proj
 | 
				
			||||||
 | 
					    //          IN--u_mu
 | 
				
			||||||
 | 
					    void projectU3(GF& u_proj, GF& u_mu) const {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        auto grid = this->_grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), diff(grid);
 | 
				
			||||||
 | 
					        CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid), 
 | 
				
			||||||
 | 
					           u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8)
 | 
				
			||||||
 | 
					        for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					            V  = PeekIndex<LorentzIndex>(u_mu, mu);
 | 
				
			||||||
 | 
					            Q  = adj(V)*V;
 | 
				
			||||||
 | 
					            c0 =        real(trace(Q));
 | 
				
			||||||
 | 
					            c1 = (1/2.)*real(trace(Q*Q));
 | 
				
			||||||
 | 
					            c2 = (1/3.)*real(trace(Q*Q*Q));
 | 
				
			||||||
 | 
					            S  = (1/3.)*c1-(1/18.)*c0*c0;
 | 
				
			||||||
 | 
					            if (norm2(S)<1e-28) {
 | 
				
			||||||
 | 
					                g0 = (1/3.)*c0; g1 = g0; g2 = g1;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                R     = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0;
 | 
				
			||||||
 | 
					                theta = acos(R*pow(S,-1.5));
 | 
				
			||||||
 | 
					                g0    = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.);
 | 
				
			||||||
 | 
					                g1    = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta          );
 | 
				
			||||||
 | 
					                g2    = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					//            if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD }
 | 
				
			||||||
 | 
					            u     = sqrt(g0) + sqrt(g1) + sqrt(g2);
 | 
				
			||||||
 | 
					            v     = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2);
 | 
				
			||||||
 | 
					            w     = sqrt(g0*g1*g2);
 | 
				
			||||||
 | 
					            den   = w*(u*v-w);
 | 
				
			||||||
 | 
					            f0    = (-w*(u*u+v)+u*v*v)/den;
 | 
				
			||||||
 | 
					            f1    = (-w-u*u*u+2.*u*v)/den;
 | 
				
			||||||
 | 
					            f2    = u/den;
 | 
				
			||||||
 | 
					            id_3  = 1.;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            PokeIndex<LorentzIndex>(u_proj, V*sqrtQinv, mu);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//    void derivative(const GaugeField& Gauge) const {
 | 
				
			||||||
 | 
					//    };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -5,4 +5,5 @@
 | 
				
			|||||||
#include <Grid/qcd/smearing/StoutSmearing.h>
 | 
					#include <Grid/qcd/smearing/StoutSmearing.h>
 | 
				
			||||||
#include <Grid/qcd/smearing/GaugeConfiguration.h>
 | 
					#include <Grid/qcd/smearing/GaugeConfiguration.h>
 | 
				
			||||||
#include <Grid/qcd/smearing/WilsonFlow.h>
 | 
					#include <Grid/qcd/smearing/WilsonFlow.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/HISQSmearing.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1133,4 +1133,13 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					template<> struct sycl::is_device_copyable<Grid::vComplexF> : public std::true_type {};
 | 
				
			||||||
 | 
					template<> struct sycl::is_device_copyable<Grid::vComplexD> : public std::true_type {};
 | 
				
			||||||
 | 
					template<> struct sycl::is_device_copyable<Grid::vRealF   > : public std::true_type {};
 | 
				
			||||||
 | 
					template<> struct sycl::is_device_copyable<Grid::vRealD   > : public std::true_type {};
 | 
				
			||||||
 | 
					template<> struct sycl::is_device_copyable<Grid::vInteger > : public std::true_type {};
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -137,5 +137,55 @@ public:
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Some machinery to streamline making a stencil 
 | 
				
			||||||
 | 
					////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class shiftSignal {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    enum {
 | 
				
			||||||
 | 
					        BACKWARD_CONST = 16,
 | 
				
			||||||
 | 
					        NO_SHIFT       = -1
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO: put a check somewhere that BACKWARD_CONST > Nd!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief signals that you want to go backwards in direction dir */
 | 
				
			||||||
 | 
					inline int Back(const int dir) {
 | 
				
			||||||
 | 
					    // generalShift will use BACKWARD_CONST to determine whether we step forward or 
 | 
				
			||||||
 | 
					    // backward. Trick inspired by SIMULATeQCD. 
 | 
				
			||||||
 | 
					    return dir + shiftSignal::BACKWARD_CONST;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief shift one unit in direction dir */
 | 
				
			||||||
 | 
					template<typename... Args>
 | 
				
			||||||
 | 
					void generalShift(Coordinate& shift, int dir) {
 | 
				
			||||||
 | 
					    if (dir >= shiftSignal::BACKWARD_CONST) {
 | 
				
			||||||
 | 
					        dir -= shiftSignal::BACKWARD_CONST;
 | 
				
			||||||
 | 
					        shift[dir]+=-1;
 | 
				
			||||||
 | 
					    } else if (dir == shiftSignal::NO_SHIFT) {
 | 
				
			||||||
 | 
					        ; // do nothing
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        shift[dir]+=1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief follow a path of directions, shifting one unit in each direction */
 | 
				
			||||||
 | 
					template<typename... Args>
 | 
				
			||||||
 | 
					void generalShift(Coordinate& shift, int dir, Args... args) {
 | 
				
			||||||
 | 
					    if (dir >= shiftSignal::BACKWARD_CONST) {
 | 
				
			||||||
 | 
					        dir -= shiftSignal::BACKWARD_CONST;
 | 
				
			||||||
 | 
					        shift[dir]+=-1;
 | 
				
			||||||
 | 
					    } else if (dir == shiftSignal::NO_SHIFT) {
 | 
				
			||||||
 | 
					        ; // do nothing
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        shift[dir]+=1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    generalShift(shift, args...);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -70,57 +70,6 @@ struct DefaultImplParams {
 | 
				
			|||||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
 | 
					void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
 | 
				
			||||||
				 int off,std::vector<std::pair<int,int> > & table);
 | 
									 int off,std::vector<std::pair<int,int> > & table);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
template<class vobj,class cobj,class compressor>
 | 
					 | 
				
			||||||
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)   __attribute__((noinline));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class vobj,class cobj,class compressor>
 | 
					 | 
				
			||||||
void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int num=table.size();
 | 
					 | 
				
			||||||
  std::pair<int,int> *table_v = & table[0];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  auto rhs_v = rhs.View(AcceleratorRead);
 | 
					 | 
				
			||||||
  accelerator_forNB( i,num, vobj::Nsimd(), {
 | 
					 | 
				
			||||||
    compress.Compress(buffer[off+table_v[i].first],rhs_v[so+table_v[i].second]);
 | 
					 | 
				
			||||||
  });
 | 
					 | 
				
			||||||
  rhs_v.ViewClose();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
///////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
// Gather for when there *is* need to SIMD split with compression
 | 
					 | 
				
			||||||
///////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
template<class cobj,class vobj,class compressor>
 | 
					 | 
				
			||||||
void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
 | 
					 | 
				
			||||||
				 commVector<cobj *> pointers,
 | 
					 | 
				
			||||||
				 int dimension,int plane,
 | 
					 | 
				
			||||||
				 int cbmask,compressor &compress,int type) __attribute__((noinline));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class cobj,class vobj,class compressor>
 | 
					 | 
				
			||||||
void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,
 | 
					 | 
				
			||||||
				 const Lattice<vobj> &rhs,
 | 
					 | 
				
			||||||
				 std::vector<cobj *> &pointers,int dimension,int plane,int cbmask,
 | 
					 | 
				
			||||||
				 compressor &compress,int type)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  assert( (table.size()&0x1)==0);
 | 
					 | 
				
			||||||
  int num=table.size()/2;
 | 
					 | 
				
			||||||
  int so  = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  auto rhs_v = rhs.View(AcceleratorRead);
 | 
					 | 
				
			||||||
  auto rhs_p = &rhs_v[0];
 | 
					 | 
				
			||||||
  auto p0=&pointers[0][0];
 | 
					 | 
				
			||||||
  auto p1=&pointers[1][0];
 | 
					 | 
				
			||||||
  auto tp=&table[0];
 | 
					 | 
				
			||||||
  accelerator_forNB(j, num, vobj::Nsimd(), {
 | 
					 | 
				
			||||||
      compress.CompressExchange(p0,p1, rhs_p, j,
 | 
					 | 
				
			||||||
				so+tp[2*j  ].second,
 | 
					 | 
				
			||||||
				so+tp[2*j+1].second,
 | 
					 | 
				
			||||||
				type);
 | 
					 | 
				
			||||||
  });
 | 
					 | 
				
			||||||
  rhs_v.ViewClose();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void DslashResetCounts(void);
 | 
					void DslashResetCounts(void);
 | 
				
			||||||
void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full);
 | 
					void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full);
 | 
				
			||||||
void DslashLogFull(void);
 | 
					void DslashLogFull(void);
 | 
				
			||||||
@@ -258,6 +207,10 @@ public:
 | 
				
			|||||||
  struct Packet {
 | 
					  struct Packet {
 | 
				
			||||||
    void * send_buf;
 | 
					    void * send_buf;
 | 
				
			||||||
    void * recv_buf;
 | 
					    void * recv_buf;
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					    void * host_send_buf; // Allocate this if not MPI_CUDA_AWARE
 | 
				
			||||||
 | 
					    void * host_recv_buf; // Allocate this if not MPI_CUDA_AWARE
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    Integer to_rank;
 | 
					    Integer to_rank;
 | 
				
			||||||
    Integer from_rank;
 | 
					    Integer from_rank;
 | 
				
			||||||
    Integer do_send;
 | 
					    Integer do_send;
 | 
				
			||||||
@@ -324,7 +277,7 @@ public:
 | 
				
			|||||||
  Vector<int> surface_list;
 | 
					  Vector<int> surface_list;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  stencilVector<StencilEntry>  _entries; // Resident in managed memory
 | 
					  stencilVector<StencilEntry>  _entries; // Resident in managed memory
 | 
				
			||||||
  commVector<StencilEntry>     _entries_device; // Resident in managed memory
 | 
					  commVector<StencilEntry>     _entries_device; // Resident in device memory
 | 
				
			||||||
  std::vector<Packet> Packets;
 | 
					  std::vector<Packet> Packets;
 | 
				
			||||||
  std::vector<Merge> Mergers;
 | 
					  std::vector<Merge> Mergers;
 | 
				
			||||||
  std::vector<Merge> MergersSHM;
 | 
					  std::vector<Merge> MergersSHM;
 | 
				
			||||||
@@ -408,33 +361,16 @@ public:
 | 
				
			|||||||
  // Use OpenMP Tasks for cleaner ???
 | 
					  // Use OpenMP Tasks for cleaner ???
 | 
				
			||||||
  // must be called *inside* parallel region
 | 
					  // must be called *inside* parallel region
 | 
				
			||||||
  //////////////////////////////////////////
 | 
					  //////////////////////////////////////////
 | 
				
			||||||
  /*
 | 
					 | 
				
			||||||
  void CommunicateThreaded()
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
#ifdef GRID_OMP
 | 
					 | 
				
			||||||
    int mythread = omp_get_thread_num();
 | 
					 | 
				
			||||||
    int nthreads = CartesianCommunicator::nCommThreads;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
    int mythread = 0;
 | 
					 | 
				
			||||||
    int nthreads = 1;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
    if (nthreads == -1) nthreads = 1;
 | 
					 | 
				
			||||||
    if (mythread < nthreads) {
 | 
					 | 
				
			||||||
      for (int i = mythread; i < Packets.size(); i += nthreads) {
 | 
					 | 
				
			||||||
	uint64_t bytes = _grid->StencilSendToRecvFrom(Packets[i].send_buf,
 | 
					 | 
				
			||||||
						      Packets[i].to_rank,
 | 
					 | 
				
			||||||
						      Packets[i].recv_buf,
 | 
					 | 
				
			||||||
						      Packets[i].from_rank,
 | 
					 | 
				
			||||||
						      Packets[i].bytes,i);
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  */
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Non blocking send and receive. Necessarily parallel.
 | 
					  // Non blocking send and receive. Necessarily parallel.
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
 | 
					  void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
 | 
					    // All GPU kernel tasks must complete
 | 
				
			||||||
 | 
					    //    accelerator_barrier();     // All kernels should ALREADY be complete
 | 
				
			||||||
 | 
					    //    _grid->StencilBarrier();   // Everyone is here, so noone running slow and still using receive buffer
 | 
				
			||||||
 | 
					                               // But the HaloGather had a barrier too.
 | 
				
			||||||
 | 
					#ifdef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
    for(int i=0;i<Packets.size();i++){
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
      _grid->StencilSendToRecvFromBegin(MpiReqs,
 | 
					      _grid->StencilSendToRecvFromBegin(MpiReqs,
 | 
				
			||||||
					Packets[i].send_buf,
 | 
										Packets[i].send_buf,
 | 
				
			||||||
@@ -443,16 +379,54 @@ public:
 | 
				
			|||||||
					Packets[i].from_rank,Packets[i].do_recv,
 | 
										Packets[i].from_rank,Packets[i].do_recv,
 | 
				
			||||||
					Packets[i].xbytes,Packets[i].rbytes,i);
 | 
										Packets[i].xbytes,Packets[i].rbytes,i);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#warning "Using COPY VIA HOST BUFFERS IN STENCIL"
 | 
				
			||||||
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
 | 
					      // Introduce a host buffer with a cheap slab allocator and zero cost wipe all
 | 
				
			||||||
 | 
					      Packets[i].host_send_buf = _grid->HostBufferMalloc(Packets[i].xbytes);
 | 
				
			||||||
 | 
					      Packets[i].host_recv_buf = _grid->HostBufferMalloc(Packets[i].rbytes);
 | 
				
			||||||
 | 
					      if ( Packets[i].do_send ) {
 | 
				
			||||||
 | 
						acceleratorCopyFromDevice(Packets[i].send_buf, Packets[i].host_send_buf,Packets[i].xbytes);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      _grid->StencilSendToRecvFromBegin(MpiReqs,
 | 
				
			||||||
 | 
										Packets[i].host_send_buf,
 | 
				
			||||||
 | 
										Packets[i].to_rank,Packets[i].do_send,
 | 
				
			||||||
 | 
										Packets[i].host_recv_buf,
 | 
				
			||||||
 | 
										Packets[i].from_rank,Packets[i].do_recv,
 | 
				
			||||||
 | 
										Packets[i].xbytes,Packets[i].rbytes,i);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    // Get comms started then run checksums
 | 
				
			||||||
 | 
					    // Having this PRIOR to the dslash seems to make Sunspot work... (!)
 | 
				
			||||||
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
 | 
					      if ( Packets[i].do_send )
 | 
				
			||||||
 | 
						FlightRecorder::xmitLog(Packets[i].send_buf,Packets[i].xbytes);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
 | 
					  void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    _grid->StencilSendToRecvFromComplete(MpiReqs,0);
 | 
					    _grid->StencilSendToRecvFromComplete(MpiReqs,0); // MPI is done
 | 
				
			||||||
    if   ( this->partialDirichlet ) DslashLogPartial();
 | 
					    if   ( this->partialDirichlet ) DslashLogPartial();
 | 
				
			||||||
    else if ( this->fullDirichlet ) DslashLogDirichlet();
 | 
					    else if ( this->fullDirichlet ) DslashLogDirichlet();
 | 
				
			||||||
    else DslashLogFull();
 | 
					    else DslashLogFull();
 | 
				
			||||||
    acceleratorCopySynchronise();
 | 
					    // acceleratorCopySynchronise() is in the StencilSendToRecvFromComplete
 | 
				
			||||||
 | 
					    //    accelerator_barrier(); 
 | 
				
			||||||
    _grid->StencilBarrier(); 
 | 
					    _grid->StencilBarrier(); 
 | 
				
			||||||
 | 
					#ifndef ACCELERATOR_AWARE_MPI
 | 
				
			||||||
 | 
					#warning "Using COPY VIA HOST BUFFERS IN STENCIL"
 | 
				
			||||||
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
 | 
					      if ( Packets[i].do_recv ) {
 | 
				
			||||||
 | 
						acceleratorCopyToDevice(Packets[i].host_recv_buf, Packets[i].recv_buf,Packets[i].rbytes);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _grid->HostBufferFreeAll();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    // run any checksums
 | 
				
			||||||
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
 | 
					      if ( Packets[i].do_recv )
 | 
				
			||||||
 | 
						FlightRecorder::recvLog(Packets[i].recv_buf,Packets[i].rbytes,Packets[i].from_rank);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Blocking send and receive. Either sequential or parallel.
 | 
					  // Blocking send and receive. Either sequential or parallel.
 | 
				
			||||||
@@ -528,6 +502,7 @@ public:
 | 
				
			|||||||
  template<class compressor>
 | 
					  template<class compressor>
 | 
				
			||||||
  void HaloGather(const Lattice<vobj> &source,compressor &compress)
 | 
					  void HaloGather(const Lattice<vobj> &source,compressor &compress)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
 | 
					    //    accelerator_barrier();
 | 
				
			||||||
    _grid->StencilBarrier();// Synch shared memory on a single nodes
 | 
					    _grid->StencilBarrier();// Synch shared memory on a single nodes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(source.Grid()==_grid);
 | 
					    assert(source.Grid()==_grid);
 | 
				
			||||||
@@ -540,10 +515,9 @@ public:
 | 
				
			|||||||
      compress.Point(point);
 | 
					      compress.Point(point);
 | 
				
			||||||
      HaloGatherDir(source,compress,point,face_idx);
 | 
					      HaloGatherDir(source,compress,point,face_idx);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    accelerator_barrier();
 | 
					    accelerator_barrier(); // All my local gathers are complete
 | 
				
			||||||
    face_table_computed=1;
 | 
					    face_table_computed=1;
 | 
				
			||||||
    assert(u_comm_offset==_unified_buffer_size);
 | 
					    assert(u_comm_offset==_unified_buffer_size);
 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////
 | 
					  /////////////////////////
 | 
				
			||||||
@@ -579,6 +553,7 @@ public:
 | 
				
			|||||||
      accelerator_forNB(j, words, cobj::Nsimd(), {
 | 
					      accelerator_forNB(j, words, cobj::Nsimd(), {
 | 
				
			||||||
	  coalescedWrite(to[j] ,coalescedRead(from [j]));
 | 
						  coalescedWrite(to[j] ,coalescedRead(from [j]));
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
 | 
					      acceleratorFenceComputeStream();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -669,6 +644,7 @@ public:
 | 
				
			|||||||
    for(int i=0;i<dd.size();i++){
 | 
					    for(int i=0;i<dd.size();i++){
 | 
				
			||||||
      decompressor::DecompressFace(decompress,dd[i]);
 | 
					      decompressor::DecompressFace(decompress,dd[i]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    acceleratorFenceComputeStream(); // dependent kernels
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
  // Set up routines
 | 
					  // Set up routines
 | 
				
			||||||
@@ -1224,7 +1200,6 @@ public:
 | 
				
			|||||||
	  ///////////////////////////////////////////////////////////
 | 
						  ///////////////////////////////////////////////////////////
 | 
				
			||||||
	  int do_send = (comms_send|comms_partial_send) && (!shm_send );
 | 
						  int do_send = (comms_send|comms_partial_send) && (!shm_send );
 | 
				
			||||||
	  int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
 | 
						  int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
 | 
				
			||||||
	  
 | 
					 | 
				
			||||||
	  AddPacket((void *)&send_buf[comm_off],
 | 
						  AddPacket((void *)&send_buf[comm_off],
 | 
				
			||||||
		    (void *)&recv_buf[comm_off],
 | 
							    (void *)&recv_buf[comm_off],
 | 
				
			||||||
		    xmit_to_rank, do_send,
 | 
							    xmit_to_rank, do_send,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -404,3 +404,12 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
  };
 | 
					  };
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					template<typename T> struct
 | 
				
			||||||
 | 
					sycl::is_device_copyable<T, typename std::enable_if<
 | 
				
			||||||
 | 
								      Grid::isGridTensor<T>::value  && (!std::is_trivially_copyable<T>::value),
 | 
				
			||||||
 | 
								      void>::type>
 | 
				
			||||||
 | 
					  : public std::true_type {};
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,10 +16,7 @@ void     acceleratorThreads(uint32_t t) {accelerator_threads = t;};
 | 
				
			|||||||
#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
 | 
					#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
 | 
				
			||||||
#define ENV_RANK_MVAPICH       "MV2_COMM_WORLD_RANK"
 | 
					#define ENV_RANK_MVAPICH       "MV2_COMM_WORLD_RANK"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
// fold omptarget into device specific acceleratorInit()
 | 
					 | 
				
			||||||
#if defined(GRID_CUDA) || (defined(GRID_OMPTARGET) && defined(__CUDA_ARCH__))
 | 
					 | 
				
			||||||
#include <cuda_runtime_api.h>
 | 
					 | 
				
			||||||
cudaDeviceProp *gpu_props;
 | 
					cudaDeviceProp *gpu_props;
 | 
				
			||||||
cudaStream_t copyStream;
 | 
					cudaStream_t copyStream;
 | 
				
			||||||
cudaStream_t computeStream;
 | 
					cudaStream_t computeStream;
 | 
				
			||||||
@@ -118,7 +115,7 @@ void acceleratorInit(void)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(GRID_HIP) || (defined(GRID_OMPTARGET) && defined(__HIP_DEVICE_COMPILE__))
 | 
					#ifdef GRID_HIP
 | 
				
			||||||
hipDeviceProp_t *gpu_props;
 | 
					hipDeviceProp_t *gpu_props;
 | 
				
			||||||
hipStream_t copyStream;
 | 
					hipStream_t copyStream;
 | 
				
			||||||
hipStream_t computeStream;
 | 
					hipStream_t computeStream;
 | 
				
			||||||
@@ -203,7 +200,7 @@ void acceleratorInit(void)
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(GRID_SYCL) //|| (defined(GRID_OMPTARGET) && defined(__SYCL_DEVICE_ONLY__))
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cl::sycl::queue *theGridAccelerator;
 | 
					cl::sycl::queue *theGridAccelerator;
 | 
				
			||||||
cl::sycl::queue *theCopyAccelerator;
 | 
					cl::sycl::queue *theCopyAccelerator;
 | 
				
			||||||
@@ -286,7 +283,7 @@ void acceleratorInit(void)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP))// && (!defined(GRID_OMPTARGET))
 | 
					#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP))
 | 
				
			||||||
void acceleratorInit(void){}
 | 
					void acceleratorInit(void){}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -26,11 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
*************************************************************************************/
 | 
					*************************************************************************************/
 | 
				
			||||||
/*  END LEGAL */
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef ACCELERATOR_H
 | 
					 | 
				
			||||||
#define ACCELERATOR_H
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef HAVE_MALLOC_MALLOC_H
 | 
					#ifdef HAVE_MALLOC_MALLOC_H
 | 
				
			||||||
@@ -228,6 +225,8 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
 | 
				
			|||||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
 | 
					inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
 | 
				
			||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
 | 
					inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
 | 
					inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
 | 
				
			||||||
 | 
					inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyHostToDevice, stream);}
 | 
				
			||||||
 | 
					inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToHost, stream);}
 | 
				
			||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
 | 
					inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
 | 
				
			||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
 | 
					inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -256,17 +255,13 @@ inline int  acceleratorIsCommunicable(void *ptr)
 | 
				
			|||||||
#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
					#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
#if 0
 | 
					
 | 
				
			||||||
#include <CL/sycl.hpp>
 | 
					// Force deterministic reductions
 | 
				
			||||||
#include <CL/sycl/usm.hpp>
 | 
					#define SYCL_REDUCTION_DETERMINISTIC
 | 
				
			||||||
#include <level_zero/ze_api.h>
 | 
					 | 
				
			||||||
#include <CL/sycl/backend/level_zero.hpp>
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#include <sycl/CL/sycl.hpp>
 | 
					#include <sycl/CL/sycl.hpp>
 | 
				
			||||||
#include <sycl/usm.hpp>
 | 
					#include <sycl/usm.hpp>
 | 
				
			||||||
#include <level_zero/ze_api.h>
 | 
					#include <level_zero/ze_api.h>
 | 
				
			||||||
#include <sycl/ext/oneapi/backend/level_zero.hpp>
 | 
					#include <sycl/ext/oneapi/backend/level_zero.hpp>
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -446,6 +441,8 @@ inline void acceleratorFreeShared(void *ptr){ auto r=hipFree(ptr);};
 | 
				
			|||||||
inline void acceleratorFreeDevice(void *ptr){ auto r=hipFree(ptr);};
 | 
					inline void acceleratorFreeDevice(void *ptr){ auto r=hipFree(ptr);};
 | 
				
			||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { auto r=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
 | 
					inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { auto r=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto r=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
 | 
					inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto r=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
 | 
				
			||||||
 | 
					inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyHostToDevice, stream);}
 | 
				
			||||||
 | 
					inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToHost, stream);}
 | 
				
			||||||
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)  { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
 | 
					//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes)  { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
 | 
				
			||||||
//inline void acceleratorCopySynchronise(void) {  }
 | 
					//inline void acceleratorCopySynchronise(void) {  }
 | 
				
			||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto r=hipMemset(base,value,bytes);}
 | 
					inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto r=hipMemset(base,value,bytes);}
 | 
				
			||||||
@@ -478,155 +475,14 @@ inline void acceleratorCopySynchronise(void) { auto r=hipStreamSynchronize(copyS
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
// OpenMP Target acceleration
 | 
					// CPU Target - No accelerator just thread instead
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
#ifdef GRID_OMPTARGET
 | 
					 | 
				
			||||||
//TODO GRID_SIMT for OMPTARGET
 | 
					 | 
				
			||||||
#define GRID_ACCELERATED
 | 
					 | 
				
			||||||
#include<omp.h>
 | 
					 | 
				
			||||||
#ifdef __CUDA_ARCH__
 | 
					 | 
				
			||||||
#include <cuda_runtime_api.h>
 | 
					 | 
				
			||||||
#elif defined __HIP_DEVICE_COMPILE__
 | 
					 | 
				
			||||||
#include <hip/hip_runtime.h>
 | 
					 | 
				
			||||||
#elif defined __SYCL_DEVICE_ONLY__
 | 
					 | 
				
			||||||
#include <CL/sycl.hpp>
 | 
					 | 
				
			||||||
#include <CL/sycl/usm.hpp>
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
extern "C" void *llvm_omp_target_alloc_host  (size_t Size, int DeviceNum);
 | 
					 | 
				
			||||||
extern "C" void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
 | 
					 | 
				
			||||||
extern "C" void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
 | 
					 | 
				
			||||||
//TODO: Dynamic Shared Memory
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define THREAD_LIMIT acceleratorThreads()
 | 
					#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define accelerator
 | 
					#undef GRID_SIMT
 | 
				
			||||||
#define accelerator_inline strong_inline
 | 
					 | 
				
			||||||
#ifdef THREAD_LIMIT
 | 
					 | 
				
			||||||
#define accelerator_for(i,num,nsimd, ... ) \
 | 
					 | 
				
			||||||
	_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT)") \
 | 
					 | 
				
			||||||
	for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ; 
 | 
					 | 
				
			||||||
#define accelerator_forNB(i,num,nsimd, ... ) \
 | 
					 | 
				
			||||||
	_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) nowait") \
 | 
					 | 
				
			||||||
        for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
 | 
					 | 
				
			||||||
#define accelerator_barrier(dummy) _Pragma("omp barrier") 
 | 
					 | 
				
			||||||
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
 | 
					 | 
				
			||||||
	_Pragma("omp target teams distribute parallel for thread_limit(THREAD_LIMIT) collapse(2)") \
 | 
					 | 
				
			||||||
        for ( uint64_t iter1=0;iter1<num1;iter1++) \
 | 
					 | 
				
			||||||
	for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define accelerator_for(i,num,nsimd, ... ) \
 | 
					 | 
				
			||||||
        _Pragma("omp target teams distribute parallel for") \
 | 
					 | 
				
			||||||
        for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
 | 
					 | 
				
			||||||
#define accelerator_forNB(i,num,nsimd, ... ) \
 | 
					 | 
				
			||||||
        _Pragma("omp target teams distribute parallel for nowait") \
 | 
					 | 
				
			||||||
        for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
 | 
					 | 
				
			||||||
#define accelerator_barrier(dummy) _Pragma("omp barrier")
 | 
					 | 
				
			||||||
#define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) \
 | 
					 | 
				
			||||||
        _Pragma("omp target teams distribute parallel for collapse(2)") \
 | 
					 | 
				
			||||||
        for ( uint64_t iter1=0;iter1<num1;iter1++) \
 | 
					 | 
				
			||||||
        for ( uint64_t iter2=0;iter2<num2;iter2++) { __VA_ARGS__ } ;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
 | 
					 | 
				
			||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int devc = omp_get_default_device();
 | 
					 | 
				
			||||||
  int host = omp_get_initial_device();
 | 
					 | 
				
			||||||
  if( omp_target_memcpy( to, from, bytes, 0, 0, devc, host ) ) {
 | 
					 | 
				
			||||||
    printf(" omp_target_memcpy host to device failed for %ld in device %d \n",bytes,devc);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int devc = omp_get_default_device();
 | 
					 | 
				
			||||||
  int host = omp_get_initial_device();
 | 
					 | 
				
			||||||
  if( omp_target_memcpy( to, from, bytes, 0, 0, host, devc ) ) {
 | 
					 | 
				
			||||||
    printf(" omp_target_memcpy device to host failed for %ld in device %d \n",bytes,devc);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) 
 | 
					 | 
				
			||||||
{ 
 | 
					 | 
				
			||||||
#ifdef __CUDA_ARCH__
 | 
					 | 
				
			||||||
  extern cudaStream_t copyStream;
 | 
					 | 
				
			||||||
  cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
 | 
					 | 
				
			||||||
#elif defined __HIP_DEVICE_COMPILE__
 | 
					 | 
				
			||||||
  extern hipStream_t copyStream;
 | 
					 | 
				
			||||||
  hipMemcpyDtoDAsync(to,from,bytes, copyStream);
 | 
					 | 
				
			||||||
#elif defined __SYCL_DEVICE_ONLY__
 | 
					 | 
				
			||||||
  theCopyAccelerator->memcpy(to,from,bytes);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void acceleratorCopySynchronise(void) 
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  //#pragma omp barrier
 | 
					 | 
				
			||||||
#ifdef __CUDA_ARCH__
 | 
					 | 
				
			||||||
  extern cudaStream_t copyStream;
 | 
					 | 
				
			||||||
  cudaStreamSynchronize(copyStream);
 | 
					 | 
				
			||||||
#elif defined __HIP_DEVICE_COMPILE__
 | 
					 | 
				
			||||||
  extern hipStream_t copyStream;
 | 
					 | 
				
			||||||
  hipStreamSynchronize(copyStream);
 | 
					 | 
				
			||||||
#elif defined __SYCL_DEVICE_ONLY__
 | 
					 | 
				
			||||||
  theCopyAccelerator->wait();
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline int  acceleratorIsCommunicable(void *ptr){ return 1; }
 | 
					 | 
				
			||||||
inline void acceleratorMemSet(void *base,int value,size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  void *base_host = memalign(GRID_ALLOC_ALIGN,bytes);
 | 
					 | 
				
			||||||
  memset(base_host,value,bytes);
 | 
					 | 
				
			||||||
  int devc = omp_get_default_device();
 | 
					 | 
				
			||||||
  int host = omp_get_initial_device();
 | 
					 | 
				
			||||||
  if( omp_target_memcpy( base, base_host, bytes, 0, 0, devc, host ) ) {
 | 
					 | 
				
			||||||
    printf(" omp_target_memcpy device to host failed in MemSet for %ld in device %d \n",bytes,devc);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void *acceleratorAllocShared(size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
#ifdef __CUDA_ARCH__
 | 
					 | 
				
			||||||
  void *ptr=NULL;
 | 
					 | 
				
			||||||
  auto err = cudaMallocManaged((void **)&ptr,bytes);
 | 
					 | 
				
			||||||
  if( err != cudaSuccess ) {
 | 
					 | 
				
			||||||
    ptr = (void *) NULL;
 | 
					 | 
				
			||||||
    printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return ptr;
 | 
					 | 
				
			||||||
#elif defined __HIP_DEVICE_COMPILE__
 | 
					 | 
				
			||||||
  void *ptr=NULL;
 | 
					 | 
				
			||||||
  auto err = hipMallocManaged((void **)&ptr,bytes);
 | 
					 | 
				
			||||||
  if( err != hipSuccess ) {
 | 
					 | 
				
			||||||
    ptr = (void *) NULL;
 | 
					 | 
				
			||||||
    printf(" hipMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return ptr;
 | 
					 | 
				
			||||||
#elif defined __SYCL_DEVICE_ONLY__
 | 
					 | 
				
			||||||
  queue q;
 | 
					 | 
				
			||||||
  //void *ptr = malloc_shared<void *>(bytes, q);
 | 
					 | 
				
			||||||
  return ptr;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
  int devc = omp_get_default_device();
 | 
					 | 
				
			||||||
  void *ptr=NULL;
 | 
					 | 
				
			||||||
  ptr = (void *) llvm_omp_target_alloc_shared(bytes, devc);
 | 
					 | 
				
			||||||
  if( ptr == NULL ) {
 | 
					 | 
				
			||||||
    printf(" llvm_omp_target_alloc_shared failed for %ld in device %d \n",bytes,devc);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return ptr;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void *acceleratorAllocDevice(size_t bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int devc = omp_get_default_device();
 | 
					 | 
				
			||||||
  void *ptr=NULL;
 | 
					 | 
				
			||||||
  ptr = (void *) omp_target_alloc(bytes, devc);
 | 
					 | 
				
			||||||
  if( ptr == NULL ) {
 | 
					 | 
				
			||||||
    printf(" omp_target_alloc failed for %ld in device %d \n",bytes,devc);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return ptr;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
inline void acceleratorFreeShared(void *ptr){omp_target_free(ptr, omp_get_default_device());};
 | 
					 | 
				
			||||||
inline void acceleratorFreeDevice(void *ptr){omp_target_free(ptr, omp_get_default_device());};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
//OpenMP CPU threads
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define accelerator 
 | 
					#define accelerator 
 | 
				
			||||||
#define accelerator_inline strong_inline
 | 
					#define accelerator_inline strong_inline
 | 
				
			||||||
@@ -655,14 +511,7 @@ inline void *acceleratorAllocDevice(size_t bytes){return memalign(GRID_ALLOC_ALI
 | 
				
			|||||||
inline void acceleratorFreeShared(void *ptr){free(ptr);};
 | 
					inline void acceleratorFreeShared(void *ptr){free(ptr);};
 | 
				
			||||||
inline void acceleratorFreeDevice(void *ptr){free(ptr);};
 | 
					inline void acceleratorFreeDevice(void *ptr){free(ptr);};
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					 | 
				
			||||||
// CPU Target - No accelerator just thread instead
 | 
					 | 
				
			||||||
//////////////////////////////////////////////
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) && (!defined(GRID_OMPTARGET))
 | 
					 | 
				
			||||||
#undef GRID_SIMT
 | 
					 | 
				
			||||||
#endif // CPU target
 | 
					#endif // CPU target
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef HAVE_MM_MALLOC_H
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
@@ -735,5 +584,3 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -46,7 +46,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_OMP
 | 
					#ifdef GRID_OMP
 | 
				
			||||||
#define DO_PRAGMA_(x) _Pragma ("x")
 | 
					#define DO_PRAGMA_(x) _Pragma (#x)
 | 
				
			||||||
#define DO_PRAGMA(x) DO_PRAGMA_(x)
 | 
					#define DO_PRAGMA(x) DO_PRAGMA_(x)
 | 
				
			||||||
#define thread_num(a) omp_get_thread_num()
 | 
					#define thread_num(a) omp_get_thread_num()
 | 
				
			||||||
#define thread_max(a) omp_get_max_threads()
 | 
					#define thread_max(a) omp_get_max_threads()
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										339
									
								
								Grid/util/FlightRecorder.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										339
									
								
								Grid/util/FlightRecorder.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,339 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/Init.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Grid Norm logging for repro testing
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					int FlightRecorder::PrintEntireLog;
 | 
				
			||||||
 | 
					int FlightRecorder::ContinueOnFail;
 | 
				
			||||||
 | 
					int FlightRecorder::LoggingMode;
 | 
				
			||||||
 | 
					int FlightRecorder::ChecksumComms;
 | 
				
			||||||
 | 
					int FlightRecorder::ChecksumCommsSend;
 | 
				
			||||||
 | 
					int32_t  FlightRecorder::XmitLoggingCounter;
 | 
				
			||||||
 | 
					int32_t  FlightRecorder::RecvLoggingCounter;
 | 
				
			||||||
 | 
					int32_t  FlightRecorder::CsumLoggingCounter;
 | 
				
			||||||
 | 
					int32_t  FlightRecorder::NormLoggingCounter;
 | 
				
			||||||
 | 
					int32_t  FlightRecorder::ReductionLoggingCounter;
 | 
				
			||||||
 | 
					uint64_t FlightRecorder::ErrorCounter;
 | 
				
			||||||
 | 
					std::vector<double> FlightRecorder::NormLogVector;
 | 
				
			||||||
 | 
					std::vector<double> FlightRecorder::ReductionLogVector;
 | 
				
			||||||
 | 
					std::vector<uint64_t> FlightRecorder::CsumLogVector;
 | 
				
			||||||
 | 
					std::vector<uint64_t> FlightRecorder::XmitLogVector;
 | 
				
			||||||
 | 
					std::vector<uint64_t> FlightRecorder::RecvLogVector;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void FlightRecorder::ResetCounters(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  XmitLoggingCounter=0;
 | 
				
			||||||
 | 
					  RecvLoggingCounter=0;
 | 
				
			||||||
 | 
					  CsumLoggingCounter=0;
 | 
				
			||||||
 | 
					  NormLoggingCounter=0;
 | 
				
			||||||
 | 
					  ReductionLoggingCounter=0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::Truncate(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  ResetCounters();
 | 
				
			||||||
 | 
					  XmitLogVector.resize(0);
 | 
				
			||||||
 | 
					  RecvLogVector.resize(0);
 | 
				
			||||||
 | 
					  NormLogVector.resize(0);
 | 
				
			||||||
 | 
					  CsumLogVector.resize(0);
 | 
				
			||||||
 | 
					  ReductionLogVector.resize(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::SetLoggingMode(FlightRecorder::LoggingMode_t mode)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  switch ( mode ) {
 | 
				
			||||||
 | 
					  case LoggingModePrint:
 | 
				
			||||||
 | 
					    SetLoggingModePrint();
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case LoggingModeRecord:
 | 
				
			||||||
 | 
					    SetLoggingModeRecord();
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case LoggingModeVerify:
 | 
				
			||||||
 | 
					    SetLoggingModeVerify();
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case LoggingModeNone:
 | 
				
			||||||
 | 
					    LoggingMode = mode;
 | 
				
			||||||
 | 
					    Truncate();
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  default:
 | 
				
			||||||
 | 
					    assert(0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void FlightRecorder::SetLoggingModePrint(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::cout << " FlightRecorder: set to print output " <<std::endl;
 | 
				
			||||||
 | 
					  Truncate();
 | 
				
			||||||
 | 
					  LoggingMode = LoggingModePrint;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::SetLoggingModeRecord(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::cout << " FlightRecorder: set to RECORD " <<std::endl;
 | 
				
			||||||
 | 
					  Truncate();
 | 
				
			||||||
 | 
					  LoggingMode = LoggingModeRecord;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::SetLoggingModeVerify(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::cout << " FlightRecorder: set to VERIFY " << NormLogVector.size()<< " log entries "<<std::endl;
 | 
				
			||||||
 | 
					  ResetCounters();
 | 
				
			||||||
 | 
					  LoggingMode = LoggingModeVerify;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					uint64_t FlightRecorder::ErrorCount(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return ErrorCounter;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::NormLog(double value)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  uint64_t hex = * ( (uint64_t *)&value );
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModePrint) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::NormLog : "<< NormLoggingCounter <<" "<<std::hex<< hex<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    NormLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeRecord) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::NormLog RECORDING : "<< NormLoggingCounter <<" "<<std::hex<< hex<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    NormLogVector.push_back(value);
 | 
				
			||||||
 | 
					    NormLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeVerify) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(NormLoggingCounter < NormLogVector.size()){
 | 
				
			||||||
 | 
					      uint64_t hexref  = * ( (uint64_t *)&NormLogVector[NormLoggingCounter] );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( (value != NormLogVector[NormLoggingCounter]) || std::isnan(value) ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::cerr<<"FlightRecorder::NormLog Oops, I did it again "<< NormLoggingCounter
 | 
				
			||||||
 | 
							 <<std::hex<<" "<<hex<<" "<<hexref<<std::dec<<" "
 | 
				
			||||||
 | 
							 <<std::hexfloat<<value<<" "<< NormLogVector[NormLoggingCounter]<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::cerr << " Oops got norm "<< std::hexfloat<<value<<" expect "<<NormLogVector[NormLoggingCounter] <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fprintf(stderr,"%s:%d Oops, I did it again! Reproduce failure for norm %d/%zu %.16e expect %.16e\n",
 | 
				
			||||||
 | 
							GridHostname(),
 | 
				
			||||||
 | 
							GlobalSharedMemory::WorldShmRank,
 | 
				
			||||||
 | 
							NormLoggingCounter,NormLogVector.size(),
 | 
				
			||||||
 | 
							value, NormLogVector[NormLoggingCounter]); fflush(stderr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if(!ContinueOnFail)assert(0); // Force takedown of job
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						ErrorCounter++;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						if ( PrintEntireLog ) { 
 | 
				
			||||||
 | 
						  std::cerr<<"FlightRecorder::NormLog VALID "<< NormLoggingCounter << std::hex
 | 
				
			||||||
 | 
							   <<" "<<hex<<" "<<hexref
 | 
				
			||||||
 | 
							   <<" "<<std::hexfloat<<value<<" "<< NormLogVector[NormLoggingCounter]<<std::dec<<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					       
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if ( NormLogVector.size()==NormLoggingCounter ) {
 | 
				
			||||||
 | 
					      std::cout << "FlightRecorder:: Verified entire sequence of "<<NormLoggingCounter<<" norms "<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    NormLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::CsumLog(uint64_t hex)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModePrint) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::CsumLog : "<< CsumLoggingCounter <<" "<<std::hex<< hex<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    CsumLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeRecord) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::CsumLog RECORDING : "<< NormLoggingCounter <<" "<<std::hex<< hex<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    CsumLogVector.push_back(hex);
 | 
				
			||||||
 | 
					    CsumLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeVerify) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if(CsumLoggingCounter < CsumLogVector.size()) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      uint64_t hexref  = CsumLogVector[CsumLoggingCounter] ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( hex != hexref ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        std::cerr<<"FlightRecorder::CsumLog Oops, I did it again "<< CsumLoggingCounter
 | 
				
			||||||
 | 
							 <<std::hex<<" "<<hex<<" "<<hexref<<std::dec<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fprintf(stderr,"%s:%d Oops, I did it again! Reproduce failure for csum %d %lx expect %lx\n",
 | 
				
			||||||
 | 
							GridHostname(),
 | 
				
			||||||
 | 
							GlobalSharedMemory::WorldShmRank,
 | 
				
			||||||
 | 
							CsumLoggingCounter,hex, hexref);
 | 
				
			||||||
 | 
						fflush(stderr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if(!ContinueOnFail) assert(0); // Force takedown of job
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						ErrorCounter++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ( PrintEntireLog ) { 
 | 
				
			||||||
 | 
						  std::cerr<<"FlightRecorder::CsumLog VALID "<< CsumLoggingCounter << std::hex
 | 
				
			||||||
 | 
							   <<" "<<hex<<" "<<hexref<<std::dec<<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }  
 | 
				
			||||||
 | 
					    if ( CsumLogVector.size()==CsumLoggingCounter ) {
 | 
				
			||||||
 | 
					      std::cout << "FlightRecorder:: Verified entire sequence of "<<CsumLoggingCounter<<" checksums "<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    CsumLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::ReductionLog(double local,double global)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  uint64_t hex_l = * ( (uint64_t *)&local );
 | 
				
			||||||
 | 
					  uint64_t hex_g = * ( (uint64_t *)&global );
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModePrint) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::ReductionLog : "<< ReductionLoggingCounter <<" "<< std::hex << hex_l << " -> " <<hex_g<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    ReductionLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeRecord) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::ReductionLog RECORDING : "<< ReductionLoggingCounter <<" "<< std::hex << hex_l << " -> " <<hex_g<<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    ReductionLogVector.push_back(global);
 | 
				
			||||||
 | 
					    ReductionLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeVerify) {
 | 
				
			||||||
 | 
					    if(ReductionLoggingCounter < ReductionLogVector.size()){
 | 
				
			||||||
 | 
					      if ( global != ReductionLogVector[ReductionLoggingCounter] ) {
 | 
				
			||||||
 | 
						fprintf(stderr,"%s:%d Oops, MPI_Allreduce did it again! Reproduce failure for norm %d/%zu glb %.16e lcl %.16e expect glb %.16e\n",
 | 
				
			||||||
 | 
							GridHostname(),
 | 
				
			||||||
 | 
							GlobalSharedMemory::WorldShmRank,
 | 
				
			||||||
 | 
							ReductionLoggingCounter,ReductionLogVector.size(),
 | 
				
			||||||
 | 
							global, local, ReductionLogVector[ReductionLoggingCounter]); fflush(stderr);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						if ( !ContinueOnFail ) assert(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ErrorCounter++;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						if ( PrintEntireLog ) { 
 | 
				
			||||||
 | 
						  std::cerr<<"FlightRecorder::ReductionLog : VALID "<< ReductionLoggingCounter <<" "<< std::hexfloat << local << "-> "<< global <<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if ( ReductionLogVector.size()==ReductionLoggingCounter ) {
 | 
				
			||||||
 | 
					      std::cout << "FlightRecorder::ReductionLog : Verified entire sequence of "<<ReductionLoggingCounter<<" norms "<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ReductionLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::xmitLog(void *buf,uint64_t bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if ( ChecksumCommsSend ){
 | 
				
			||||||
 | 
					  uint64_t *ubuf = (uint64_t *)buf;
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeNone) return;
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					  uint64_t _xor = svm_xor(ubuf,bytes/sizeof(uint64_t));
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModePrint) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::xmitLog : "<< XmitLoggingCounter <<" "<< std::hex << _xor <<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    XmitLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeRecord) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::xmitLog RECORD : "<< XmitLoggingCounter <<" "<< std::hex << _xor <<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    XmitLogVector.push_back(_xor);
 | 
				
			||||||
 | 
					    XmitLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeVerify) {
 | 
				
			||||||
 | 
					    if(XmitLoggingCounter < XmitLogVector.size()){
 | 
				
			||||||
 | 
					      if ( _xor != XmitLogVector[XmitLoggingCounter] ) {
 | 
				
			||||||
 | 
						fprintf(stderr,"%s:%d Oops, send buf difference! Reproduce failure for xmit %d/%zu  %lx expect glb %lx\n",
 | 
				
			||||||
 | 
							GridHostname(),
 | 
				
			||||||
 | 
							GlobalSharedMemory::WorldShmRank,
 | 
				
			||||||
 | 
							XmitLoggingCounter,XmitLogVector.size(),
 | 
				
			||||||
 | 
							_xor, XmitLogVector[XmitLoggingCounter]); fflush(stderr);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						if ( !ContinueOnFail ) assert(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ErrorCounter++;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						if ( PrintEntireLog ) { 
 | 
				
			||||||
 | 
						  std::cerr<<"FlightRecorder::XmitLog : VALID "<< XmitLoggingCounter <<" "<< std::hexfloat << _xor << " "<<  XmitLogVector[XmitLoggingCounter] <<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if ( XmitLogVector.size()==XmitLoggingCounter ) {
 | 
				
			||||||
 | 
					      std::cout << "FlightRecorder::ReductionLog : Verified entire sequence of "<<XmitLoggingCounter<<" sends "<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    XmitLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    uint64_t word = 1;
 | 
				
			||||||
 | 
					    deviceVector<uint64_t> dev(1);
 | 
				
			||||||
 | 
					    acceleratorCopyToDevice(&word,&dev[0],sizeof(uint64_t));
 | 
				
			||||||
 | 
					    acceleratorCopySynchronise();
 | 
				
			||||||
 | 
					    MPI_Barrier(MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void FlightRecorder::recvLog(void *buf,uint64_t bytes,int rank)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if ( ChecksumComms ){
 | 
				
			||||||
 | 
					  uint64_t *ubuf = (uint64_t *)buf;
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeNone) return;
 | 
				
			||||||
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
 | 
					  uint64_t _xor = svm_xor(ubuf,bytes/sizeof(uint64_t));
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModePrint) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::recvLog : "<< RecvLoggingCounter <<" "<< std::hex << _xor <<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    RecvLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeRecord) {
 | 
				
			||||||
 | 
					    std::cerr<<"FlightRecorder::recvLog RECORD : "<< RecvLoggingCounter <<" "<< std::hex << _xor <<std::dec <<std::endl;
 | 
				
			||||||
 | 
					    RecvLogVector.push_back(_xor);
 | 
				
			||||||
 | 
					    RecvLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if(LoggingMode == LoggingModeVerify) {
 | 
				
			||||||
 | 
					    if(RecvLoggingCounter < RecvLogVector.size()){
 | 
				
			||||||
 | 
					      if ( _xor != RecvLogVector[RecvLoggingCounter] ) {
 | 
				
			||||||
 | 
						fprintf(stderr,"%s:%d Oops, recv buf difference! Reproduce failure for recv %d/%zu  %lx expect glb %lx from MPI rank %d\n",
 | 
				
			||||||
 | 
							GridHostname(),
 | 
				
			||||||
 | 
							GlobalSharedMemory::WorldShmRank,
 | 
				
			||||||
 | 
							RecvLoggingCounter,RecvLogVector.size(),
 | 
				
			||||||
 | 
							_xor, RecvLogVector[RecvLoggingCounter],rank); fflush(stderr);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						if ( !ContinueOnFail ) assert(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ErrorCounter++;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						if ( PrintEntireLog ) { 
 | 
				
			||||||
 | 
						  std::cerr<<"FlightRecorder::RecvLog : VALID "<< RecvLoggingCounter <<" "<< std::hexfloat << _xor << " "<<  RecvLogVector[RecvLoggingCounter] <<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if ( RecvLogVector.size()==RecvLoggingCounter ) {
 | 
				
			||||||
 | 
					      std::cout << "FlightRecorder::ReductionLog : Verified entire sequence of "<<RecvLoggingCounter<<" sends "<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    RecvLoggingCounter++;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
							
								
								
									
										43
									
								
								Grid/util/FlightRecorder.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								Grid/util/FlightRecorder.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,43 @@
 | 
				
			|||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					class FlightRecorder {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  enum LoggingMode_t {
 | 
				
			||||||
 | 
					    LoggingModeNone,
 | 
				
			||||||
 | 
					    LoggingModePrint,
 | 
				
			||||||
 | 
					    LoggingModeRecord,
 | 
				
			||||||
 | 
					    LoggingModeVerify
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static int                   LoggingMode;
 | 
				
			||||||
 | 
					  static uint64_t              ErrorCounter;
 | 
				
			||||||
 | 
					  static int32_t               XmitLoggingCounter;
 | 
				
			||||||
 | 
					  static int32_t               RecvLoggingCounter;
 | 
				
			||||||
 | 
					  static int32_t               CsumLoggingCounter;
 | 
				
			||||||
 | 
					  static int32_t               NormLoggingCounter;
 | 
				
			||||||
 | 
					  static int32_t               ReductionLoggingCounter;
 | 
				
			||||||
 | 
					  static std::vector<uint64_t> XmitLogVector;
 | 
				
			||||||
 | 
					  static std::vector<uint64_t> RecvLogVector;
 | 
				
			||||||
 | 
					  static std::vector<uint64_t> CsumLogVector;
 | 
				
			||||||
 | 
					  static std::vector<double>   NormLogVector;
 | 
				
			||||||
 | 
					  static std::vector<double>   ReductionLogVector;
 | 
				
			||||||
 | 
					  static int ContinueOnFail;
 | 
				
			||||||
 | 
					  static int PrintEntireLog;
 | 
				
			||||||
 | 
					  static int ChecksumComms;
 | 
				
			||||||
 | 
					  static int ChecksumCommsSend;
 | 
				
			||||||
 | 
					  static void SetLoggingModePrint(void);
 | 
				
			||||||
 | 
					  static void SetLoggingModeRecord(void);
 | 
				
			||||||
 | 
					  static void SetLoggingModeVerify(void);
 | 
				
			||||||
 | 
					  static void SetLoggingMode(LoggingMode_t mode);
 | 
				
			||||||
 | 
					  static void NormLog(double value);
 | 
				
			||||||
 | 
					  static void CsumLog(uint64_t csum);
 | 
				
			||||||
 | 
					  static void ReductionLog(double lcl, double glbl);
 | 
				
			||||||
 | 
					  static void Truncate(void);
 | 
				
			||||||
 | 
					  static void ResetCounters(void);
 | 
				
			||||||
 | 
					  static uint64_t ErrorCount(void);
 | 
				
			||||||
 | 
					  static void xmitLog(void *,uint64_t bytes);
 | 
				
			||||||
 | 
					  static void recvLog(void *,uint64_t bytes,int rank);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -77,6 +77,10 @@ feenableexcept (unsigned int excepts)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef HOST_NAME_MAX
 | 
				
			||||||
 | 
					#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////
 | 
				
			||||||
@@ -90,7 +94,12 @@ int GridThread::_threads =1;
 | 
				
			|||||||
int GridThread::_hyperthreads=1;
 | 
					int GridThread::_hyperthreads=1;
 | 
				
			||||||
int GridThread::_cores=1;
 | 
					int GridThread::_cores=1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					char hostname[HOST_NAME_MAX+1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					char *GridHostname(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return hostname;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
const Coordinate &GridDefaultLatt(void)     {return Grid_default_latt;};
 | 
					const Coordinate &GridDefaultLatt(void)     {return Grid_default_latt;};
 | 
				
			||||||
const Coordinate &GridDefaultMpi(void)      {return Grid_default_mpi;};
 | 
					const Coordinate &GridDefaultMpi(void)      {return Grid_default_mpi;};
 | 
				
			||||||
const Coordinate GridDefaultSimd(int dims,int nsimd)
 | 
					const Coordinate GridDefaultSimd(int dims,int nsimd)
 | 
				
			||||||
@@ -393,6 +402,8 @@ void Grid_init(int *argc,char ***argv)
 | 
				
			|||||||
  std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
 | 
					  std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
 | 
				
			||||||
  std::cout << GridLogMessage << "================================================ "<<std::endl;
 | 
					  std::cout << GridLogMessage << "================================================ "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  gethostname(hostname, HOST_NAME_MAX+1);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "This rank is running on host "<< hostname<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////
 | 
				
			||||||
  // Reporting
 | 
					  // Reporting
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
void Grid_init(int *argc,char ***argv);
 | 
					void Grid_init(int *argc,char ***argv);
 | 
				
			||||||
void Grid_finalize(void);
 | 
					void Grid_finalize(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					char * GridHostname(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// internal, controled with --handle
 | 
					// internal, controled with --handle
 | 
				
			||||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
 | 
					void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
 | 
				
			||||||
void Grid_debug_handler_init(void);
 | 
					void Grid_debug_handler_init(void);
 | 
				
			||||||
@@ -68,5 +70,6 @@ void GridParseLayout(char **argv,int argc,
 | 
				
			|||||||
void printHash(void);
 | 
					void printHash(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,6 @@
 | 
				
			|||||||
#ifndef GRID_UTIL_H
 | 
					#pragma once
 | 
				
			||||||
#define GRID_UTIL_H
 | 
					 | 
				
			||||||
#include <Grid/util/Coordinate.h>
 | 
					#include <Grid/util/Coordinate.h>
 | 
				
			||||||
#include <Grid/util/Lexicographic.h>
 | 
					#include <Grid/util/Lexicographic.h>
 | 
				
			||||||
#include <Grid/util/Init.h>
 | 
					#include <Grid/util/Init.h>
 | 
				
			||||||
#endif
 | 
					#include <Grid/util/FlightRecorder.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										25
									
								
								HOWTO
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								HOWTO
									
									
									
									
									
								
							@@ -1,25 +0,0 @@
 | 
				
			|||||||
1. on Cori GPU, load necessary modules
 | 
					 | 
				
			||||||
source ./load_cgpu_modules.sh 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. run bootstrap scrip
 | 
					 | 
				
			||||||
./bootstrap.sh
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. Create a build directory, for example, 
 | 
					 | 
				
			||||||
mkdir build-cgpu
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. run configure script in the build directory 
 | 
					 | 
				
			||||||
cd build-cgpu
 | 
					 | 
				
			||||||
sh config-command
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Example config-command for single-GPU omp offload: 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
../configure \
 | 
					 | 
				
			||||||
    --enable-comms=none \
 | 
					 | 
				
			||||||
    --enable-simd=GEN \
 | 
					 | 
				
			||||||
    --enable-gen-simd-width=16 \
 | 
					 | 
				
			||||||
    CXX=clang++ \
 | 
					 | 
				
			||||||
    LDFLAGS="-L${CUDA_ROOT}/lib64 -lcudart" \
 | 
					 | 
				
			||||||
    CXXFLAGS="-Wno-unknown-cuda-version -I${CUDA_ROOT}/include -fopenmp -std=c++14 -fopenmp-cuda-mode  -O3 -g -fopenmp-targets=nvptx64-nvidia-cuda -Wformat -DOMPTARGET -DOMPTARGET_MANAGED"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
4. compile 
 | 
					 | 
				
			||||||
make -j8
 | 
					 | 
				
			||||||
@@ -1,27 +0,0 @@
 | 
				
			|||||||
module load rocm/5.5.1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
mkdir build-amd-err && cd build-amd-err
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cp ../amd-omp-stack-err/Test.cc ../amd-omp-stack-err/WilsonFermionInstantiationWilsonImplD.cc .
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
../configure CXX=amdclang++ --enable-comms=none --enable-simd=GEN --enable-accelerator-cshift=no --enable-shm=no --disable-unified --enable-unified=no --enable-fermion-reps=no --enable-gen-simd-width=16 CXXFLAGS="-Wno-unknown-cuda-version -fopenmp --offload-arch=gfx90a -std=c++14 -fopenmp-cuda-mode -O3 -g -Wformat -DEIGEN_NO_CUDA -DEIGEN_DONT_VECTORIZE -DOMPTARGET"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
amdclang++ -c Test.cc -o Test.o -I/autofs/nccs-svm1_home1/atif/Grid -I/autofs/nccs-svm1_home1/atif/Grid/build-amd-err/Grid/ -O3 -Wno-unknown-cuda-version -fopenmp --offload-arch=gfx90a -std=c++14 -fopenmp-cuda-mode -O3 -Wformat -DEIGEN_NO_CUDA -DOMPTARGET -fno-strict-aliasing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
amdclang++ -c WilsonFermionInstantiationWilsonImplD.cc -o WilsonFails.o -I/autofs/nccs-svm1_home1/atif/Grid -I/autofs/nccs-svm1_home1/atif/Grid/build-amd-err/Grid/ -O3 -Wno-unknown-cuda-version -fopenmp --offload-arch=gfx90a -std=c++14 -fopenmp-cuda-mode -O3 -Wformat -DEIGEN_NO_CUDA -DOMPTARGET -fno-strict-aliasing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ar cru libWilsonFails.a WilsonFails.o
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ranlib libWilsonFails.a
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
amdclang++ -o Test -I/autofs/nccs-svm1_home1/atif/Grid -I/autofs/nccs-svm1_home1/atif/Grid/build-amd-err/Grid/   -O3 -Wno-unknown-cuda-version -fopenmp --offload-arch=gfx90a -std=c++14 -fopenmp-cuda-mode -O3 -Wformat -DEIGEN_NO_CUDA -DOMPTARGET  -fno-strict-aliasing Test.o -L./  -lWilsonFails
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
error: stack frame size (149840) exceeds limit (131056) in function '__omp_offloading_72_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE_l190'
 | 
					 | 
				
			||||||
error: stack frame size (149840) exceeds limit (131056) in function '__omp_offloading_72_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS_23LatticeBinaryExpressionINS_10BinaryOrOrENS0_INS1_INS3_IjNS7_IjEEEEEEEESL_EESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE_l190'
 | 
					 | 
				
			||||||
error: stack frame size (149840) exceeds limit (131056) in function '__omp_offloading_72_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryAddESD_NS_24LatticeTrinaryExpressionINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EEEERSD_RKNS_23LatticeBinaryExpressionIT_T0_T1_EE_l166'
 | 
					 | 
				
			||||||
clang-16: error: amdgcn-link command failed with exit code 1 (use -v to see invocation)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
llvm-objdump -t libWilsonFermionWorks2.a > objdump_works2.txt
 | 
					 | 
				
			||||||
llvm-cxxfilt < objdump_works2.txt > cxxfilt_works2.txt
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@@ -1,615 +0,0 @@
 | 
				
			|||||||
/*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Grid physics library, www.github.com/paboyle/Grid
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Copyright (C) 2022
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
					 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is free software; you can redistribute it and/or modify
 | 
					 | 
				
			||||||
it under the terms of the GNU General Public License as published by
 | 
					 | 
				
			||||||
the Free Software Foundation; either version 2 of the License, or
 | 
					 | 
				
			||||||
(at your option) any later version.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is distributed in the hope that it will be useful,
 | 
					 | 
				
			||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					 | 
				
			||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					 | 
				
			||||||
GNU General Public License for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You should have received a copy of the GNU General Public License along
 | 
					 | 
				
			||||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
					 | 
				
			||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See the full license in the file "LICENSE" in the top level distribution
 | 
					 | 
				
			||||||
directory
 | 
					 | 
				
			||||||
*************************************************************************************/
 | 
					 | 
				
			||||||
			   /*  END LEGAL */
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
// Constructor and gauge import
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
					 | 
				
			||||||
//                                   GridRedBlackCartesian &Hgrid, RealD _mass,
 | 
					 | 
				
			||||||
//                                   const ImplParams &p,
 | 
					 | 
				
			||||||
//                                   const WilsonAnisotropyCoefficients &anis)
 | 
					 | 
				
			||||||
//  :
 | 
					 | 
				
			||||||
//    Kernels(p),
 | 
					 | 
				
			||||||
//    _grid(&Fgrid),
 | 
					 | 
				
			||||||
//    _cbgrid(&Hgrid),
 | 
					 | 
				
			||||||
//    Stencil(&Fgrid, npoint, Even, directions, displacements,p),
 | 
					 | 
				
			||||||
//    StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even
 | 
					 | 
				
			||||||
//    StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd
 | 
					 | 
				
			||||||
//    mass(_mass),
 | 
					 | 
				
			||||||
//    Lebesgue(_grid),
 | 
					 | 
				
			||||||
//    LebesgueEvenOdd(_cbgrid),
 | 
					 | 
				
			||||||
//    Umu(&Fgrid),
 | 
					 | 
				
			||||||
//    UmuEven(&Hgrid),
 | 
					 | 
				
			||||||
//    UmuOdd(&Hgrid),
 | 
					 | 
				
			||||||
//      _tmp(&Hgrid),
 | 
					 | 
				
			||||||
//      anisotropyCoeff(anis)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Stencil.lo     = &Lebesgue;
 | 
					 | 
				
			||||||
//  StencilEven.lo = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  StencilOdd.lo  = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  // Allocate the required comms buffer
 | 
					 | 
				
			||||||
//  ImportGauge(_Umu);
 | 
					 | 
				
			||||||
//  if  (anisotropyCoeff.isAnisotropic){
 | 
					 | 
				
			||||||
//    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    diag_mass = 4.0 + mass;
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int vol4;
 | 
					 | 
				
			||||||
//  vol4=Fgrid.oSites();
 | 
					 | 
				
			||||||
//  Stencil.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  vol4=Hgrid.oSites();
 | 
					 | 
				
			||||||
//  StencilEven.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  StencilOdd.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GaugeField HUmu(_Umu.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  //Here multiply the anisotropy coefficients
 | 
					 | 
				
			||||||
//  if (anisotropyCoeff.isAnisotropic)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    for (int mu = 0; mu < Nd; mu++)
 | 
					 | 
				
			||||||
//    {
 | 
					 | 
				
			||||||
//      GaugeLinkField U_dir = (-0.5)*PeekIndex<LorentzIndex>(_Umu, mu);
 | 
					 | 
				
			||||||
//      if (mu != anisotropyCoeff.t_direction)
 | 
					 | 
				
			||||||
//        U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      PokeIndex<LorentzIndex>(HUmu, U_dir, mu);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    HUmu = _Umu * (-0.5);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Even, UmuEven, Umu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Odd, UmuOdd, Umu);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//// Implement the interface
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  typename FermionField::scalar_type scal(diag_mass);
 | 
					 | 
				
			||||||
//  out = scal * in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Mooee(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  out = (1.0/(diag_mass))*in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  MooeeInv(in,out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  typedef typename FermionField::vector_type vector_type;
 | 
					 | 
				
			||||||
//  typedef typename FermionField::scalar_type ScalComplex;
 | 
					 | 
				
			||||||
//  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  // what type LatticeComplex
 | 
					 | 
				
			||||||
//  conformable(_grid,out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Coordinate latt_size   = _grid->_fdimensions;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField   num  (_grid); num  = Zero();
 | 
					 | 
				
			||||||
//  LatComplex    wilson(_grid); wilson= Zero();
 | 
					 | 
				
			||||||
//  LatComplex     one  (_grid); one = ScalComplex(1.0,0.0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  LatComplex denom(_grid); denom= Zero();
 | 
					 | 
				
			||||||
//  LatComplex kmu(_grid);
 | 
					 | 
				
			||||||
//  ScalComplex ci(0.0,1.0);
 | 
					 | 
				
			||||||
//  // momphase = n * 2pi / L
 | 
					 | 
				
			||||||
//  for(int mu=0;mu<Nd;mu++) {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    LatticeCoordinate(kmu,mu);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    kmu = TwoPiL * kmu;
 | 
					 | 
				
			||||||
//    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);    // derivative term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    denom=denom + sin(kmu)*sin(kmu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  wilson = wilson + _m;     // 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  num   = num + wilson*in;     // -i gmu sin k + 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= one/denom;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//// Internal
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                        GaugeField &mat, const FermionField &A,
 | 
					 | 
				
			||||||
//                                        const FermionField &B, int dag) {
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField Btilde(B.Grid());
 | 
					 | 
				
			||||||
//  FermionField Atilde(B.Grid());
 | 
					 | 
				
			||||||
//  Atilde = A;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  st.HaloExchange(B, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  for (int mu = 0; mu < Nd; mu++) {
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // Flip gamma (1+g)<->(1-g) if dag
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    int gamma = mu;
 | 
					 | 
				
			||||||
//    if (!dag) gamma += Nd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    int Ls=1;
 | 
					 | 
				
			||||||
//    Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // spin trace outer product
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    Impl::InsertForce4D(mat, Btilde, Atilde, mu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _grid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = U.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(Stencil, Umu, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
 | 
					 | 
				
			||||||
//  // Motivation: look at the SchurDiff operator
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _grid);  // verifies full grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDir(in, out, dir, disp);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDirAll(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//  int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//  int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert((out.size()==8)||(out.size()==9));
 | 
					 | 
				
			||||||
//  for(int dir=0;dir<Nd;dir++){
 | 
					 | 
				
			||||||
//    for(int disp=-1;disp<=1;disp+=2){
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//      int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//      int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  int Ls=1;
 | 
					 | 
				
			||||||
//  uint64_t Nsite=in.oSites();
 | 
					 | 
				
			||||||
//  Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//#ifdef GRID_OMP
 | 
					 | 
				
			||||||
//  if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
 | 
					 | 
				
			||||||
//    DhopInternalOverlappedComms(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//#endif
 | 
					 | 
				
			||||||
//    DhopInternalSerial(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//						      DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//						      const FermionField &in,
 | 
					 | 
				
			||||||
//						      FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopOverlapped");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  int len =  U.Grid()->oSites();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Start comms  // Gather intranode and extra node differentiated??
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  std::vector<std::vector<CommsRequest_t> > requests;
 | 
					 | 
				
			||||||
//  st.Prepare();
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Gather");
 | 
					 | 
				
			||||||
//    st.HaloGather(in,compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tracePush("Communication");
 | 
					 | 
				
			||||||
//  st.CommunicateBegin(requests);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Overlap with comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("MergeSHM");
 | 
					 | 
				
			||||||
//    st.CommsMergeSHM(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute interior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Complete comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  st.CommunicateComplete(requests);
 | 
					 | 
				
			||||||
//  tracePop("Communication");
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Merge");
 | 
					 | 
				
			||||||
//    st.CommsMerge(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute exterior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopSerial");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("HaloExchange");
 | 
					 | 
				
			||||||
//    st.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDag");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Dhop");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
///*Change ends */
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///*******************************************************************************
 | 
					 | 
				
			||||||
// * Conserved current utilities for Wilson fermions, for contracting propagators
 | 
					 | 
				
			||||||
// * to make a conserved current sink or inserting the conserved current
 | 
					 | 
				
			||||||
// * sequentially.
 | 
					 | 
				
			||||||
// ******************************************************************************/
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_in_2,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_out,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &src,
 | 
					 | 
				
			||||||
//                                                   Current curr_type,
 | 
					 | 
				
			||||||
//                                                   unsigned int mu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
//    exit(1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma g5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_1.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_2.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
//  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  PropagatorField tmp_shifted(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField g5Lg5(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField R(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField gmuR(UGrid);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  g5Lg5=g5*q_in_1*g5;
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_2,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_1,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  g5Lg5=g5*g5Lg5*g5;
 | 
					 | 
				
			||||||
//  R=q_in_2;
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
template <class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
					 | 
				
			||||||
                                              PropagatorField &q_out,
 | 
					 | 
				
			||||||
                                              PropagatorField &src,
 | 
					 | 
				
			||||||
                                              Current curr_type,
 | 
					 | 
				
			||||||
                                              unsigned int mu,
 | 
					 | 
				
			||||||
                                              unsigned int tmin,
 | 
					 | 
				
			||||||
                                              unsigned int tmax,
 | 
					 | 
				
			||||||
					      ComplexField &lattice_cmplx)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
    exit(1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int tshift = (mu == Nd-1) ? 1 : 0;
 | 
					 | 
				
			||||||
  unsigned int LLt    = GridDefaultLatt()[Tp];
 | 
					 | 
				
			||||||
  conformable(_grid, q_in.Grid());
 | 
					 | 
				
			||||||
  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  PropagatorField tmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField Utmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField L(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField zz (UGrid);
 | 
					 | 
				
			||||||
  zz=Zero();
 | 
					 | 
				
			||||||
  LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  tmp = Cshift(q_in,mu,1);
 | 
					 | 
				
			||||||
  Impl::multLinkField(Utmp,this->Umu,tmp,mu);
 | 
					 | 
				
			||||||
  tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
 | 
					 | 
				
			||||||
  tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
 | 
					 | 
				
			||||||
//  q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp = q_in *lattice_cmplx;
 | 
					 | 
				
			||||||
//  tmp = Cshift(tmp,mu,-1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
 | 
					 | 
				
			||||||
//  tmp = -( Utmp + gmu*Utmp );
 | 
					 | 
				
			||||||
//  // Mask the time
 | 
					 | 
				
			||||||
//  if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
 | 
					 | 
				
			||||||
//    unsigned int t0 = 0;
 | 
					 | 
				
			||||||
//    tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    tmp = where((lcoor>=tmin+tshift),tmp,zz);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template class WilsonFermion<WilsonImplD>; 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					 | 
				
			||||||
@@ -1,615 +0,0 @@
 | 
				
			|||||||
/*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Grid physics library, www.github.com/paboyle/Grid
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Copyright (C) 2022
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
					 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is free software; you can redistribute it and/or modify
 | 
					 | 
				
			||||||
it under the terms of the GNU General Public License as published by
 | 
					 | 
				
			||||||
the Free Software Foundation; either version 2 of the License, or
 | 
					 | 
				
			||||||
(at your option) any later version.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is distributed in the hope that it will be useful,
 | 
					 | 
				
			||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					 | 
				
			||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					 | 
				
			||||||
GNU General Public License for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You should have received a copy of the GNU General Public License along
 | 
					 | 
				
			||||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
					 | 
				
			||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See the full license in the file "LICENSE" in the top level distribution
 | 
					 | 
				
			||||||
directory
 | 
					 | 
				
			||||||
*************************************************************************************/
 | 
					 | 
				
			||||||
			   /*  END LEGAL */
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
// Constructor and gauge import
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
					 | 
				
			||||||
//                                   GridRedBlackCartesian &Hgrid, RealD _mass,
 | 
					 | 
				
			||||||
//                                   const ImplParams &p,
 | 
					 | 
				
			||||||
//                                   const WilsonAnisotropyCoefficients &anis)
 | 
					 | 
				
			||||||
//  :
 | 
					 | 
				
			||||||
//    Kernels(p),
 | 
					 | 
				
			||||||
//    _grid(&Fgrid),
 | 
					 | 
				
			||||||
//    _cbgrid(&Hgrid),
 | 
					 | 
				
			||||||
//    Stencil(&Fgrid, npoint, Even, directions, displacements,p),
 | 
					 | 
				
			||||||
//    StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even
 | 
					 | 
				
			||||||
//    StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd
 | 
					 | 
				
			||||||
//    mass(_mass),
 | 
					 | 
				
			||||||
//    Lebesgue(_grid),
 | 
					 | 
				
			||||||
//    LebesgueEvenOdd(_cbgrid),
 | 
					 | 
				
			||||||
//    Umu(&Fgrid),
 | 
					 | 
				
			||||||
//    UmuEven(&Hgrid),
 | 
					 | 
				
			||||||
//    UmuOdd(&Hgrid),
 | 
					 | 
				
			||||||
//      _tmp(&Hgrid),
 | 
					 | 
				
			||||||
//      anisotropyCoeff(anis)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Stencil.lo     = &Lebesgue;
 | 
					 | 
				
			||||||
//  StencilEven.lo = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  StencilOdd.lo  = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  // Allocate the required comms buffer
 | 
					 | 
				
			||||||
//  ImportGauge(_Umu);
 | 
					 | 
				
			||||||
//  if  (anisotropyCoeff.isAnisotropic){
 | 
					 | 
				
			||||||
//    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    diag_mass = 4.0 + mass;
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int vol4;
 | 
					 | 
				
			||||||
//  vol4=Fgrid.oSites();
 | 
					 | 
				
			||||||
//  Stencil.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  vol4=Hgrid.oSites();
 | 
					 | 
				
			||||||
//  StencilEven.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  StencilOdd.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GaugeField HUmu(_Umu.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  //Here multiply the anisotropy coefficients
 | 
					 | 
				
			||||||
//  if (anisotropyCoeff.isAnisotropic)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    for (int mu = 0; mu < Nd; mu++)
 | 
					 | 
				
			||||||
//    {
 | 
					 | 
				
			||||||
//      GaugeLinkField U_dir = (-0.5)*PeekIndex<LorentzIndex>(_Umu, mu);
 | 
					 | 
				
			||||||
//      if (mu != anisotropyCoeff.t_direction)
 | 
					 | 
				
			||||||
//        U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      PokeIndex<LorentzIndex>(HUmu, U_dir, mu);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    HUmu = _Umu * (-0.5);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Even, UmuEven, Umu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Odd, UmuOdd, Umu);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//// Implement the interface
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  typename FermionField::scalar_type scal(diag_mass);
 | 
					 | 
				
			||||||
//  out = scal * in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Mooee(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  out = (1.0/(diag_mass))*in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  MooeeInv(in,out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  typedef typename FermionField::vector_type vector_type;
 | 
					 | 
				
			||||||
//  typedef typename FermionField::scalar_type ScalComplex;
 | 
					 | 
				
			||||||
//  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  // what type LatticeComplex
 | 
					 | 
				
			||||||
//  conformable(_grid,out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Coordinate latt_size   = _grid->_fdimensions;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField   num  (_grid); num  = Zero();
 | 
					 | 
				
			||||||
//  LatComplex    wilson(_grid); wilson= Zero();
 | 
					 | 
				
			||||||
//  LatComplex     one  (_grid); one = ScalComplex(1.0,0.0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  LatComplex denom(_grid); denom= Zero();
 | 
					 | 
				
			||||||
//  LatComplex kmu(_grid);
 | 
					 | 
				
			||||||
//  ScalComplex ci(0.0,1.0);
 | 
					 | 
				
			||||||
//  // momphase = n * 2pi / L
 | 
					 | 
				
			||||||
//  for(int mu=0;mu<Nd;mu++) {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    LatticeCoordinate(kmu,mu);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    kmu = TwoPiL * kmu;
 | 
					 | 
				
			||||||
//    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);    // derivative term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    denom=denom + sin(kmu)*sin(kmu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  wilson = wilson + _m;     // 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  num   = num + wilson*in;     // -i gmu sin k + 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= one/denom;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//// Internal
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                        GaugeField &mat, const FermionField &A,
 | 
					 | 
				
			||||||
//                                        const FermionField &B, int dag) {
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField Btilde(B.Grid());
 | 
					 | 
				
			||||||
//  FermionField Atilde(B.Grid());
 | 
					 | 
				
			||||||
//  Atilde = A;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  st.HaloExchange(B, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  for (int mu = 0; mu < Nd; mu++) {
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // Flip gamma (1+g)<->(1-g) if dag
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    int gamma = mu;
 | 
					 | 
				
			||||||
//    if (!dag) gamma += Nd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    int Ls=1;
 | 
					 | 
				
			||||||
//    Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // spin trace outer product
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    Impl::InsertForce4D(mat, Btilde, Atilde, mu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _grid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = U.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(Stencil, Umu, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
 | 
					 | 
				
			||||||
//  // Motivation: look at the SchurDiff operator
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _grid);  // verifies full grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDir(in, out, dir, disp);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDirAll(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//  int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//  int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert((out.size()==8)||(out.size()==9));
 | 
					 | 
				
			||||||
//  for(int dir=0;dir<Nd;dir++){
 | 
					 | 
				
			||||||
//    for(int disp=-1;disp<=1;disp+=2){
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//      int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//      int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  int Ls=1;
 | 
					 | 
				
			||||||
//  uint64_t Nsite=in.oSites();
 | 
					 | 
				
			||||||
//  Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//#ifdef GRID_OMP
 | 
					 | 
				
			||||||
//  if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
 | 
					 | 
				
			||||||
//    DhopInternalOverlappedComms(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//#endif
 | 
					 | 
				
			||||||
//    DhopInternalSerial(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//						      DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//						      const FermionField &in,
 | 
					 | 
				
			||||||
//						      FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopOverlapped");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  int len =  U.Grid()->oSites();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Start comms  // Gather intranode and extra node differentiated??
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  std::vector<std::vector<CommsRequest_t> > requests;
 | 
					 | 
				
			||||||
//  st.Prepare();
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Gather");
 | 
					 | 
				
			||||||
//    st.HaloGather(in,compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tracePush("Communication");
 | 
					 | 
				
			||||||
//  st.CommunicateBegin(requests);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Overlap with comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("MergeSHM");
 | 
					 | 
				
			||||||
//    st.CommsMergeSHM(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute interior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Complete comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  st.CommunicateComplete(requests);
 | 
					 | 
				
			||||||
//  tracePop("Communication");
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Merge");
 | 
					 | 
				
			||||||
//    st.CommsMerge(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute exterior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopSerial");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("HaloExchange");
 | 
					 | 
				
			||||||
//    st.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDag");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Dhop");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
///*Change ends */
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///*******************************************************************************
 | 
					 | 
				
			||||||
// * Conserved current utilities for Wilson fermions, for contracting propagators
 | 
					 | 
				
			||||||
// * to make a conserved current sink or inserting the conserved current
 | 
					 | 
				
			||||||
// * sequentially.
 | 
					 | 
				
			||||||
// ******************************************************************************/
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_in_2,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_out,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &src,
 | 
					 | 
				
			||||||
//                                                   Current curr_type,
 | 
					 | 
				
			||||||
//                                                   unsigned int mu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
//    exit(1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma g5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_1.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_2.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
//  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  PropagatorField tmp_shifted(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField g5Lg5(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField R(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField gmuR(UGrid);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  g5Lg5=g5*q_in_1*g5;
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_2,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_1,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  g5Lg5=g5*g5Lg5*g5;
 | 
					 | 
				
			||||||
//  R=q_in_2;
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
template <class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
					 | 
				
			||||||
                                              PropagatorField &q_out,
 | 
					 | 
				
			||||||
                                              PropagatorField &src,
 | 
					 | 
				
			||||||
                                              Current curr_type,
 | 
					 | 
				
			||||||
                                              unsigned int mu,
 | 
					 | 
				
			||||||
                                              unsigned int tmin,
 | 
					 | 
				
			||||||
                                              unsigned int tmax,
 | 
					 | 
				
			||||||
					      ComplexField &lattice_cmplx)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
    exit(1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int tshift = (mu == Nd-1) ? 1 : 0;
 | 
					 | 
				
			||||||
  unsigned int LLt    = GridDefaultLatt()[Tp];
 | 
					 | 
				
			||||||
  conformable(_grid, q_in.Grid());
 | 
					 | 
				
			||||||
  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  PropagatorField tmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField Utmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField L(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField zz (UGrid);
 | 
					 | 
				
			||||||
  zz=Zero();
 | 
					 | 
				
			||||||
  LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  tmp = Cshift(q_in,mu,1);
 | 
					 | 
				
			||||||
  Impl::multLinkField(Utmp,this->Umu,tmp,mu);
 | 
					 | 
				
			||||||
  tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
 | 
					 | 
				
			||||||
  tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
 | 
					 | 
				
			||||||
//  q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp = q_in *lattice_cmplx;
 | 
					 | 
				
			||||||
//  tmp = Cshift(tmp,mu,-1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
 | 
					 | 
				
			||||||
//  tmp = -( Utmp + gmu*Utmp );
 | 
					 | 
				
			||||||
//  // Mask the time
 | 
					 | 
				
			||||||
//  if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
 | 
					 | 
				
			||||||
//    unsigned int t0 = 0;
 | 
					 | 
				
			||||||
//    tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    tmp = where((lcoor>=tmin+tshift),tmp,zz);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template class WilsonFermion<WilsonImplD>; 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					 | 
				
			||||||
@@ -1,615 +0,0 @@
 | 
				
			|||||||
/*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Grid physics library, www.github.com/paboyle/Grid
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Copyright (C) 2022
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
					 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is free software; you can redistribute it and/or modify
 | 
					 | 
				
			||||||
it under the terms of the GNU General Public License as published by
 | 
					 | 
				
			||||||
the Free Software Foundation; either version 2 of the License, or
 | 
					 | 
				
			||||||
(at your option) any later version.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is distributed in the hope that it will be useful,
 | 
					 | 
				
			||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					 | 
				
			||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					 | 
				
			||||||
GNU General Public License for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You should have received a copy of the GNU General Public License along
 | 
					 | 
				
			||||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
					 | 
				
			||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See the full license in the file "LICENSE" in the top level distribution
 | 
					 | 
				
			||||||
directory
 | 
					 | 
				
			||||||
*************************************************************************************/
 | 
					 | 
				
			||||||
			   /*  END LEGAL */
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
// Constructor and gauge import
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
					 | 
				
			||||||
//                                   GridRedBlackCartesian &Hgrid, RealD _mass,
 | 
					 | 
				
			||||||
//                                   const ImplParams &p,
 | 
					 | 
				
			||||||
//                                   const WilsonAnisotropyCoefficients &anis)
 | 
					 | 
				
			||||||
//  :
 | 
					 | 
				
			||||||
//    Kernels(p),
 | 
					 | 
				
			||||||
//    _grid(&Fgrid),
 | 
					 | 
				
			||||||
//    _cbgrid(&Hgrid),
 | 
					 | 
				
			||||||
//    Stencil(&Fgrid, npoint, Even, directions, displacements,p),
 | 
					 | 
				
			||||||
//    StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even
 | 
					 | 
				
			||||||
//    StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd
 | 
					 | 
				
			||||||
//    mass(_mass),
 | 
					 | 
				
			||||||
//    Lebesgue(_grid),
 | 
					 | 
				
			||||||
//    LebesgueEvenOdd(_cbgrid),
 | 
					 | 
				
			||||||
//    Umu(&Fgrid),
 | 
					 | 
				
			||||||
//    UmuEven(&Hgrid),
 | 
					 | 
				
			||||||
//    UmuOdd(&Hgrid),
 | 
					 | 
				
			||||||
//      _tmp(&Hgrid),
 | 
					 | 
				
			||||||
//      anisotropyCoeff(anis)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Stencil.lo     = &Lebesgue;
 | 
					 | 
				
			||||||
//  StencilEven.lo = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  StencilOdd.lo  = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  // Allocate the required comms buffer
 | 
					 | 
				
			||||||
//  ImportGauge(_Umu);
 | 
					 | 
				
			||||||
//  if  (anisotropyCoeff.isAnisotropic){
 | 
					 | 
				
			||||||
//    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    diag_mass = 4.0 + mass;
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int vol4;
 | 
					 | 
				
			||||||
//  vol4=Fgrid.oSites();
 | 
					 | 
				
			||||||
//  Stencil.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  vol4=Hgrid.oSites();
 | 
					 | 
				
			||||||
//  StencilEven.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  StencilOdd.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GaugeField HUmu(_Umu.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  //Here multiply the anisotropy coefficients
 | 
					 | 
				
			||||||
//  if (anisotropyCoeff.isAnisotropic)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    for (int mu = 0; mu < Nd; mu++)
 | 
					 | 
				
			||||||
//    {
 | 
					 | 
				
			||||||
//      GaugeLinkField U_dir = (-0.5)*PeekIndex<LorentzIndex>(_Umu, mu);
 | 
					 | 
				
			||||||
//      if (mu != anisotropyCoeff.t_direction)
 | 
					 | 
				
			||||||
//        U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      PokeIndex<LorentzIndex>(HUmu, U_dir, mu);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    HUmu = _Umu * (-0.5);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Even, UmuEven, Umu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Odd, UmuOdd, Umu);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//// Implement the interface
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  typename FermionField::scalar_type scal(diag_mass);
 | 
					 | 
				
			||||||
//  out = scal * in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Mooee(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  out = (1.0/(diag_mass))*in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  MooeeInv(in,out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  typedef typename FermionField::vector_type vector_type;
 | 
					 | 
				
			||||||
//  typedef typename FermionField::scalar_type ScalComplex;
 | 
					 | 
				
			||||||
//  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  // what type LatticeComplex
 | 
					 | 
				
			||||||
//  conformable(_grid,out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Coordinate latt_size   = _grid->_fdimensions;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField   num  (_grid); num  = Zero();
 | 
					 | 
				
			||||||
//  LatComplex    wilson(_grid); wilson= Zero();
 | 
					 | 
				
			||||||
//  LatComplex     one  (_grid); one = ScalComplex(1.0,0.0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  LatComplex denom(_grid); denom= Zero();
 | 
					 | 
				
			||||||
//  LatComplex kmu(_grid);
 | 
					 | 
				
			||||||
//  ScalComplex ci(0.0,1.0);
 | 
					 | 
				
			||||||
//  // momphase = n * 2pi / L
 | 
					 | 
				
			||||||
//  for(int mu=0;mu<Nd;mu++) {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    LatticeCoordinate(kmu,mu);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    kmu = TwoPiL * kmu;
 | 
					 | 
				
			||||||
//    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);    // derivative term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    denom=denom + sin(kmu)*sin(kmu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  wilson = wilson + _m;     // 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  num   = num + wilson*in;     // -i gmu sin k + 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= one/denom;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//// Internal
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                        GaugeField &mat, const FermionField &A,
 | 
					 | 
				
			||||||
//                                        const FermionField &B, int dag) {
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField Btilde(B.Grid());
 | 
					 | 
				
			||||||
//  FermionField Atilde(B.Grid());
 | 
					 | 
				
			||||||
//  Atilde = A;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  st.HaloExchange(B, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  for (int mu = 0; mu < Nd; mu++) {
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // Flip gamma (1+g)<->(1-g) if dag
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    int gamma = mu;
 | 
					 | 
				
			||||||
//    if (!dag) gamma += Nd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    int Ls=1;
 | 
					 | 
				
			||||||
//    Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // spin trace outer product
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    Impl::InsertForce4D(mat, Btilde, Atilde, mu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _grid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = U.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(Stencil, Umu, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
 | 
					 | 
				
			||||||
//  // Motivation: look at the SchurDiff operator
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _grid);  // verifies full grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDir(in, out, dir, disp);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDirAll(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//  int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//  int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert((out.size()==8)||(out.size()==9));
 | 
					 | 
				
			||||||
//  for(int dir=0;dir<Nd;dir++){
 | 
					 | 
				
			||||||
//    for(int disp=-1;disp<=1;disp+=2){
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//      int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//      int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  int Ls=1;
 | 
					 | 
				
			||||||
//  uint64_t Nsite=in.oSites();
 | 
					 | 
				
			||||||
//  Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//#ifdef GRID_OMP
 | 
					 | 
				
			||||||
//  if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
 | 
					 | 
				
			||||||
//    DhopInternalOverlappedComms(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//#endif
 | 
					 | 
				
			||||||
//    DhopInternalSerial(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//						      DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//						      const FermionField &in,
 | 
					 | 
				
			||||||
//						      FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopOverlapped");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  int len =  U.Grid()->oSites();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Start comms  // Gather intranode and extra node differentiated??
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  std::vector<std::vector<CommsRequest_t> > requests;
 | 
					 | 
				
			||||||
//  st.Prepare();
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Gather");
 | 
					 | 
				
			||||||
//    st.HaloGather(in,compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tracePush("Communication");
 | 
					 | 
				
			||||||
//  st.CommunicateBegin(requests);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Overlap with comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("MergeSHM");
 | 
					 | 
				
			||||||
//    st.CommsMergeSHM(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute interior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Complete comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  st.CommunicateComplete(requests);
 | 
					 | 
				
			||||||
//  tracePop("Communication");
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Merge");
 | 
					 | 
				
			||||||
//    st.CommsMerge(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute exterior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopSerial");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("HaloExchange");
 | 
					 | 
				
			||||||
//    st.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDag");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Dhop");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
///*Change ends */
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///*******************************************************************************
 | 
					 | 
				
			||||||
// * Conserved current utilities for Wilson fermions, for contracting propagators
 | 
					 | 
				
			||||||
// * to make a conserved current sink or inserting the conserved current
 | 
					 | 
				
			||||||
// * sequentially.
 | 
					 | 
				
			||||||
// ******************************************************************************/
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_in_2,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_out,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &src,
 | 
					 | 
				
			||||||
//                                                   Current curr_type,
 | 
					 | 
				
			||||||
//                                                   unsigned int mu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
//    exit(1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma g5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_1.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_2.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
//  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  PropagatorField tmp_shifted(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField g5Lg5(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField R(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField gmuR(UGrid);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  g5Lg5=g5*q_in_1*g5;
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_2,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_1,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  g5Lg5=g5*g5Lg5*g5;
 | 
					 | 
				
			||||||
//  R=q_in_2;
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
template <class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
					 | 
				
			||||||
                                              PropagatorField &q_out,
 | 
					 | 
				
			||||||
                                              PropagatorField &src,
 | 
					 | 
				
			||||||
                                              Current curr_type,
 | 
					 | 
				
			||||||
                                              unsigned int mu,
 | 
					 | 
				
			||||||
                                              unsigned int tmin,
 | 
					 | 
				
			||||||
                                              unsigned int tmax,
 | 
					 | 
				
			||||||
					      ComplexField &lattice_cmplx)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
    exit(1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int tshift = (mu == Nd-1) ? 1 : 0;
 | 
					 | 
				
			||||||
  unsigned int LLt    = GridDefaultLatt()[Tp];
 | 
					 | 
				
			||||||
  conformable(_grid, q_in.Grid());
 | 
					 | 
				
			||||||
  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  PropagatorField tmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField Utmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField L(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField zz (UGrid);
 | 
					 | 
				
			||||||
  zz=Zero();
 | 
					 | 
				
			||||||
  LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  tmp = Cshift(q_in,mu,1);
 | 
					 | 
				
			||||||
  Impl::multLinkField(Utmp,this->Umu,tmp,mu);
 | 
					 | 
				
			||||||
  tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
 | 
					 | 
				
			||||||
//  tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
 | 
					 | 
				
			||||||
//  q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp = q_in *lattice_cmplx;
 | 
					 | 
				
			||||||
//  tmp = Cshift(tmp,mu,-1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
 | 
					 | 
				
			||||||
//  tmp = -( Utmp + gmu*Utmp );
 | 
					 | 
				
			||||||
//  // Mask the time
 | 
					 | 
				
			||||||
//  if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
 | 
					 | 
				
			||||||
//    unsigned int t0 = 0;
 | 
					 | 
				
			||||||
//    tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    tmp = where((lcoor>=tmin+tshift),tmp,zz);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template class WilsonFermion<WilsonImplD>; 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					 | 
				
			||||||
@@ -1,615 +0,0 @@
 | 
				
			|||||||
/*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Grid physics library, www.github.com/paboyle/Grid
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Copyright (C) 2022
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
					 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					 | 
				
			||||||
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is free software; you can redistribute it and/or modify
 | 
					 | 
				
			||||||
it under the terms of the GNU General Public License as published by
 | 
					 | 
				
			||||||
the Free Software Foundation; either version 2 of the License, or
 | 
					 | 
				
			||||||
(at your option) any later version.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This program is distributed in the hope that it will be useful,
 | 
					 | 
				
			||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					 | 
				
			||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					 | 
				
			||||||
GNU General Public License for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You should have received a copy of the GNU General Public License along
 | 
					 | 
				
			||||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
					 | 
				
			||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See the full license in the file "LICENSE" in the top level distribution
 | 
					 | 
				
			||||||
directory
 | 
					 | 
				
			||||||
*************************************************************************************/
 | 
					 | 
				
			||||||
			   /*  END LEGAL */
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
					 | 
				
			||||||
#include <Grid/qcd/action/fermion/WilsonFermion.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
// Constructor and gauge import
 | 
					 | 
				
			||||||
/////////////////////////////////
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
					 | 
				
			||||||
//                                   GridRedBlackCartesian &Hgrid, RealD _mass,
 | 
					 | 
				
			||||||
//                                   const ImplParams &p,
 | 
					 | 
				
			||||||
//                                   const WilsonAnisotropyCoefficients &anis)
 | 
					 | 
				
			||||||
//  :
 | 
					 | 
				
			||||||
//    Kernels(p),
 | 
					 | 
				
			||||||
//    _grid(&Fgrid),
 | 
					 | 
				
			||||||
//    _cbgrid(&Hgrid),
 | 
					 | 
				
			||||||
//    Stencil(&Fgrid, npoint, Even, directions, displacements,p),
 | 
					 | 
				
			||||||
//    StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even
 | 
					 | 
				
			||||||
//    StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd
 | 
					 | 
				
			||||||
//    mass(_mass),
 | 
					 | 
				
			||||||
//    Lebesgue(_grid),
 | 
					 | 
				
			||||||
//    LebesgueEvenOdd(_cbgrid),
 | 
					 | 
				
			||||||
//    Umu(&Fgrid),
 | 
					 | 
				
			||||||
//    UmuEven(&Hgrid),
 | 
					 | 
				
			||||||
//    UmuOdd(&Hgrid),
 | 
					 | 
				
			||||||
//      _tmp(&Hgrid),
 | 
					 | 
				
			||||||
//      anisotropyCoeff(anis)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Stencil.lo     = &Lebesgue;
 | 
					 | 
				
			||||||
//  StencilEven.lo = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  StencilOdd.lo  = &LebesgueEvenOdd;
 | 
					 | 
				
			||||||
//  // Allocate the required comms buffer
 | 
					 | 
				
			||||||
//  ImportGauge(_Umu);
 | 
					 | 
				
			||||||
//  if  (anisotropyCoeff.isAnisotropic){
 | 
					 | 
				
			||||||
//    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    diag_mass = 4.0 + mass;
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int vol4;
 | 
					 | 
				
			||||||
//  vol4=Fgrid.oSites();
 | 
					 | 
				
			||||||
//  Stencil.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  vol4=Hgrid.oSites();
 | 
					 | 
				
			||||||
//  StencilEven.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//  StencilOdd.BuildSurfaceList(1,vol4);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GaugeField HUmu(_Umu.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  //Here multiply the anisotropy coefficients
 | 
					 | 
				
			||||||
//  if (anisotropyCoeff.isAnisotropic)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    for (int mu = 0; mu < Nd; mu++)
 | 
					 | 
				
			||||||
//    {
 | 
					 | 
				
			||||||
//      GaugeLinkField U_dir = (-0.5)*PeekIndex<LorentzIndex>(_Umu, mu);
 | 
					 | 
				
			||||||
//      if (mu != anisotropyCoeff.t_direction)
 | 
					 | 
				
			||||||
//        U_dir *= (anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      PokeIndex<LorentzIndex>(HUmu, U_dir, mu);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    HUmu = _Umu * (-0.5);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Even, UmuEven, Umu);
 | 
					 | 
				
			||||||
//  pickCheckerboard(Odd, UmuOdd, Umu);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//// Implement the interface
 | 
					 | 
				
			||||||
///////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Dhop(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  axpy(out, diag_mass, in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerNo);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if (in.Checkerboard() == Odd) {
 | 
					 | 
				
			||||||
//    DhopEO(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    DhopOE(in, out, DaggerYes);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  typename FermionField::scalar_type scal(diag_mass);
 | 
					 | 
				
			||||||
//  out = scal * in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  Mooee(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  out = (1.0/(diag_mass))*in;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//  MooeeInv(in,out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template<class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  typedef typename FermionField::vector_type vector_type;
 | 
					 | 
				
			||||||
//  typedef typename FermionField::scalar_type ScalComplex;
 | 
					 | 
				
			||||||
//  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  // what type LatticeComplex
 | 
					 | 
				
			||||||
//  conformable(_grid,out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Coordinate latt_size   = _grid->_fdimensions;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField   num  (_grid); num  = Zero();
 | 
					 | 
				
			||||||
//  LatComplex    wilson(_grid); wilson= Zero();
 | 
					 | 
				
			||||||
//  LatComplex     one  (_grid); one = ScalComplex(1.0,0.0);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  LatComplex denom(_grid); denom= Zero();
 | 
					 | 
				
			||||||
//  LatComplex kmu(_grid);
 | 
					 | 
				
			||||||
//  ScalComplex ci(0.0,1.0);
 | 
					 | 
				
			||||||
//  // momphase = n * 2pi / L
 | 
					 | 
				
			||||||
//  for(int mu=0;mu<Nd;mu++) {
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    LatticeCoordinate(kmu,mu);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    kmu = TwoPiL * kmu;
 | 
					 | 
				
			||||||
//    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);    // derivative term
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    denom=denom + sin(kmu)*sin(kmu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  wilson = wilson + _m;     // 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  num   = num + wilson*in;     // -i gmu sin k + 2 sin^2 k/2 + m
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  denom= one/denom;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//// Internal
 | 
					 | 
				
			||||||
/////////////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                        GaugeField &mat, const FermionField &A,
 | 
					 | 
				
			||||||
//                                        const FermionField &B, int dag) {
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  FermionField Btilde(B.Grid());
 | 
					 | 
				
			||||||
//  FermionField Atilde(B.Grid());
 | 
					 | 
				
			||||||
//  Atilde = A;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  st.HaloExchange(B, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  for (int mu = 0; mu < Nd; mu++) {
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // Flip gamma (1+g)<->(1-g) if dag
 | 
					 | 
				
			||||||
//    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    int gamma = mu;
 | 
					 | 
				
			||||||
//    if (!dag) gamma += Nd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    int Ls=1;
 | 
					 | 
				
			||||||
//    Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    // spin trace outer product
 | 
					 | 
				
			||||||
//    //////////////////////////////////////////////////
 | 
					 | 
				
			||||||
//    Impl::InsertForce4D(mat, Btilde, Atilde, mu);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _grid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = U.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(Stencil, Umu, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido)
 | 
					 | 
				
			||||||
//  // Motivation: look at the SchurDiff operator
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), _cbgrid);
 | 
					 | 
				
			||||||
//  conformable(U.Grid(), V.Grid());
 | 
					 | 
				
			||||||
//  //conformable(U.Grid(), mat.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(V.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  assert(U.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  mat.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _grid);  // verifies full grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  out.Checkerboard() = in.Checkerboard();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Even);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Odd;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), _cbgrid);    // verifies half grid
 | 
					 | 
				
			||||||
//  conformable(in.Grid(), out.Grid());  // drops the cb check
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert(in.Checkerboard() == Odd);
 | 
					 | 
				
			||||||
//  out.Checkerboard() = Even;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDir(in, out, dir, disp);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  DhopDirAll(in, out);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//  int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//  int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  Compressor compressor(DaggerNo);
 | 
					 | 
				
			||||||
//  Stencil.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  assert((out.size()==8)||(out.size()==9));
 | 
					 | 
				
			||||||
//  for(int dir=0;dir<Nd;dir++){
 | 
					 | 
				
			||||||
//    for(int disp=-1;disp<=1;disp+=2){
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      int skip = (disp == 1) ? 0 : 1;
 | 
					 | 
				
			||||||
//      int dirdisp = dir + skip * 4;
 | 
					 | 
				
			||||||
//      int gamma = dir + (1 - skip) * 4;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
					 | 
				
			||||||
//    }
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  int Ls=1;
 | 
					 | 
				
			||||||
//  uint64_t Nsite=in.oSites();
 | 
					 | 
				
			||||||
//  Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//#ifdef GRID_OMP
 | 
					 | 
				
			||||||
//  if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
 | 
					 | 
				
			||||||
//    DhopInternalOverlappedComms(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//  else
 | 
					 | 
				
			||||||
//#endif
 | 
					 | 
				
			||||||
//    DhopInternalSerial(st,lo,U,in,out,dag);
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//						      DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//						      const FermionField &in,
 | 
					 | 
				
			||||||
//						      FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopOverlapped");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  int len =  U.Grid()->oSites();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Start comms  // Gather intranode and extra node differentiated??
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  std::vector<std::vector<CommsRequest_t> > requests;
 | 
					 | 
				
			||||||
//  st.Prepare();
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Gather");
 | 
					 | 
				
			||||||
//    st.HaloGather(in,compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tracePush("Communication");
 | 
					 | 
				
			||||||
//  st.CommunicateBegin(requests);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Overlap with comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("MergeSHM");
 | 
					 | 
				
			||||||
//    st.CommsMergeSHM(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute interior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopInterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // Complete comms
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  st.CommunicateComplete(requests);
 | 
					 | 
				
			||||||
//  tracePop("Communication");
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Merge");
 | 
					 | 
				
			||||||
//    st.CommsMerge(compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//  // do the compute exterior
 | 
					 | 
				
			||||||
//  /////////////////////////////
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDagExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopExterior");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
////
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
 | 
					 | 
				
			||||||
//                                       DoubledGaugeField &U,
 | 
					 | 
				
			||||||
//                                       const FermionField &in,
 | 
					 | 
				
			||||||
//                                       FermionField &out, int dag)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  GRID_TRACE("DhopSerial");
 | 
					 | 
				
			||||||
//  assert((dag == DaggerNo) || (dag == DaggerYes));
 | 
					 | 
				
			||||||
//  Compressor compressor(dag);
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    GRID_TRACE("HaloExchange");
 | 
					 | 
				
			||||||
//    st.HaloExchange(in, compressor);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  int Opt = WilsonKernelsStatic::Opt;
 | 
					 | 
				
			||||||
//  if (dag == DaggerYes) {
 | 
					 | 
				
			||||||
//    GRID_TRACE("DhopDag");
 | 
					 | 
				
			||||||
//    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    GRID_TRACE("Dhop");
 | 
					 | 
				
			||||||
//    Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//};
 | 
					 | 
				
			||||||
///*Change ends */
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
///*******************************************************************************
 | 
					 | 
				
			||||||
// * Conserved current utilities for Wilson fermions, for contracting propagators
 | 
					 | 
				
			||||||
// * to make a conserved current sink or inserting the conserved current
 | 
					 | 
				
			||||||
// * sequentially.
 | 
					 | 
				
			||||||
// ******************************************************************************/
 | 
					 | 
				
			||||||
//template <class Impl>
 | 
					 | 
				
			||||||
//void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_in_2,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &q_out,
 | 
					 | 
				
			||||||
//                                                   PropagatorField &src,
 | 
					 | 
				
			||||||
//                                                   Current curr_type,
 | 
					 | 
				
			||||||
//                                                   unsigned int mu)
 | 
					 | 
				
			||||||
//{
 | 
					 | 
				
			||||||
//  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
//  {
 | 
					 | 
				
			||||||
//    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
//    exit(1);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  Gamma g5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_1.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_in_2.Grid());
 | 
					 | 
				
			||||||
//  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
//  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  PropagatorField tmp_shifted(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField g5Lg5(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField R(UGrid);
 | 
					 | 
				
			||||||
//  PropagatorField gmuR(UGrid);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
//    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
//  };
 | 
					 | 
				
			||||||
//  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  g5Lg5=g5*q_in_1*g5;
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_2,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp_shifted=Cshift(q_in_1,mu,1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
 | 
					 | 
				
			||||||
//  g5Lg5=g5*g5Lg5*g5;
 | 
					 | 
				
			||||||
//  R=q_in_2;
 | 
					 | 
				
			||||||
//  gmuR=gmu*R;
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*R;
 | 
					 | 
				
			||||||
//  q_out-=adj(g5Lg5)*gmuR;
 | 
					 | 
				
			||||||
//}
 | 
					 | 
				
			||||||
using Impl = WilsonImplD;
 | 
					 | 
				
			||||||
template <class Impl>
 | 
					 | 
				
			||||||
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
 | 
					 | 
				
			||||||
                                              PropagatorField &q_out,
 | 
					 | 
				
			||||||
                                              PropagatorField &src,
 | 
					 | 
				
			||||||
                                              Current curr_type,
 | 
					 | 
				
			||||||
                                              unsigned int mu,
 | 
					 | 
				
			||||||
                                              unsigned int tmin,
 | 
					 | 
				
			||||||
                                              unsigned int tmax,
 | 
					 | 
				
			||||||
					      ComplexField &lattice_cmplx)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  if(curr_type != Current::Vector)
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
 | 
					 | 
				
			||||||
    exit(1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int tshift = (mu == Nd-1) ? 1 : 0;
 | 
					 | 
				
			||||||
  unsigned int LLt    = GridDefaultLatt()[Tp];
 | 
					 | 
				
			||||||
  conformable(_grid, q_in.Grid());
 | 
					 | 
				
			||||||
  conformable(_grid, q_out.Grid());
 | 
					 | 
				
			||||||
  auto UGrid= this->GaugeGrid();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  PropagatorField tmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField Utmp(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField L(UGrid);
 | 
					 | 
				
			||||||
  PropagatorField zz (UGrid);
 | 
					 | 
				
			||||||
  zz=Zero();
 | 
					 | 
				
			||||||
  LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  Gamma gmu=Gamma(Gmu[mu]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  tmp = Cshift(q_in,mu,1);
 | 
					 | 
				
			||||||
  Impl::multLinkField(Utmp,this->Umu,tmp,mu);
 | 
					 | 
				
			||||||
  tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
 | 
					 | 
				
			||||||
  tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
 | 
					 | 
				
			||||||
//  q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
//  tmp = q_in *lattice_cmplx;
 | 
					 | 
				
			||||||
//  tmp = Cshift(tmp,mu,-1);
 | 
					 | 
				
			||||||
//  Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
 | 
					 | 
				
			||||||
//  tmp = -( Utmp + gmu*Utmp );
 | 
					 | 
				
			||||||
//  // Mask the time
 | 
					 | 
				
			||||||
//  if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
 | 
					 | 
				
			||||||
//    unsigned int t0 = 0;
 | 
					 | 
				
			||||||
//    tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
 | 
					 | 
				
			||||||
//  } else {
 | 
					 | 
				
			||||||
//    tmp = where((lcoor>=tmin+tshift),tmp,zz);
 | 
					 | 
				
			||||||
//  }
 | 
					 | 
				
			||||||
//  q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//template class WilsonFermion<WilsonImplD>; 
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					 | 
				
			||||||
@@ -1,586 +0,0 @@
 | 
				
			|||||||
 | 
					 | 
				
			||||||
libWilsonFails.a(WilsonFails.o):	file format elf64-x86-64
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYMBOL TABLE:
 | 
					 | 
				
			||||||
0000000000000000 l    df *ABS*	0000000000000000 WilsonFermionInstantiationWilsonImplD.cc
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text	0000000000000000 .text
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 GCC_except_table1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 GCC_except_table3
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv	0000000000000000 .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l       .rodata.cst8	0000000000000000 .LCPI20_0
 | 
					 | 
				
			||||||
0000000000000008 l       .rodata.cst8	0000000000000000 .LCPI20_1
 | 
					 | 
				
			||||||
0000000000000000 l       .rodata.cst16	0000000000000000 .LCPI20_2
 | 
					 | 
				
			||||||
0000000000000010 l       .rodata.cst16	0000000000000000 .LCPI20_3
 | 
					 | 
				
			||||||
0000000000000020 l       .rodata.cst16	0000000000000000 .LCPI20_4
 | 
					 | 
				
			||||||
0000000000000030 l       .rodata.cst16	0000000000000000 .LCPI20_5
 | 
					 | 
				
			||||||
0000000000000040 l       .rodata.cst16	0000000000000000 .LCPI20_6
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 GCC_except_table20
 | 
					 | 
				
			||||||
0000000000000010 l       .rodata.cst8	0000000000000000 .LCPI21_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 GCC_except_table21
 | 
					 | 
				
			||||||
0000000000000050 l       .rodata.cst16	0000000000000000 .LCPI22_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 GCC_except_table22
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 .text._ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 GCC_except_table31
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 GCC_except_table35
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 GCC_except_table36
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 GCC_except_table38
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000 l     F .text	000000000000010d .omp_outlined.
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 GCC_except_table39
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000001160 l     F .text	00000000000000e5 .omp_outlined..44
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE
 | 
					 | 
				
			||||||
000000000000bf90 l     F .text	00000000000000e5 .omp_outlined..52
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev	0000000000000000 GCC_except_table42
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table43
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 .text._ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 GCC_except_table44
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE	0000000000000000 .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 .text._ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 GCC_except_table46
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_	0000000000000000 .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_
 | 
					 | 
				
			||||||
0000000000000100 l     O .rodata	0000000000000001 _ZStL19piecewise_construct
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 GCC_except_table49
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 GCC_except_table55
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 GCC_except_table56
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z	0000000000000000 .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 GCC_except_table63
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 GCC_except_table64
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table69
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii	0000000000000000 .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000060 l       .rodata.cst16	0000000000000000 .LCPI71_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii	0000000000000000 .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 GCC_except_table72
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 GCC_except_table73
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 GCC_except_table74
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm	0000000000000000 .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 GCC_except_table76
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 GCC_except_table77
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000110 l     F .text	000000000000104c .omp_outlined..41
 | 
					 | 
				
			||||||
0000000000000070 l       .rodata.cst16	0000000000000000 .LCPI80_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 GCC_except_table81
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE
 | 
					 | 
				
			||||||
0000000000001250 l     F .text	000000000000ad36 .omp_outlined..45
 | 
					 | 
				
			||||||
0000000000000080 l       .rodata.cst16	0000000000000000 .LCPI87_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata	0000000000000000 .rodata
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EE
 | 
					 | 
				
			||||||
0000000000000090 l       .rodata.cst16	0000000000000000 .LCPI92_0
 | 
					 | 
				
			||||||
00000000000000a0 l       .rodata.cst16	0000000000000000 .LCPI92_1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_	0000000000000000 .text._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_	0000000000000000 .gcc_except_table._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_	0000000000000000 GCC_except_table92
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv	0000000000000000 GCC_except_table97
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m	0000000000000000 GCC_except_table98
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
000000000000c080 l     F .text	000000000000046a .omp_outlined..53
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEELPS9_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEELPS9_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 GCC_except_table104
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 GCC_except_table105
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 GCC_except_table106
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 GCC_except_table107
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 GCC_except_table108
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 GCC_except_table109
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 GCC_except_table110
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 GCC_except_table111
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text.startup	0000000000000000 .text.startup
 | 
					 | 
				
			||||||
0000000000000000 l     F .text.startup	0000000000000001 __cxx_global_var_init.56
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 GCC_except_table116
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
00000000000000b0 l       .rodata.cst16	0000000000000000 .LCPI123_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 .text._ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 GCC_except_table123
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 GCC_except_table127
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000c4f0 l     F .text	00000000000000e5 .omp_outlined..63
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000c730 l     F .text	00000000000000e5 .omp_outlined..67
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000cdd0 l     F .text	00000000000000e5 .omp_outlined..79
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table131
 | 
					 | 
				
			||||||
00000000000000c0 l       .rodata.cst16	0000000000000000 .LCPI132_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_	0000000000000000 .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesianD0Ev	0000000000000000 .text._ZN4Grid13GridCartesianD0Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian14CheckerBoardedEi	0000000000000000 .text._ZN4Grid13GridCartesian14CheckerBoardedEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii	0000000000000000 .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii	0000000000000000 .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii	0000000000000000 .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi	0000000000000000 .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi	0000000000000000 .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 GCC_except_table145
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 GCC_except_table146
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table150
 | 
					 | 
				
			||||||
000000000000c5e0 l     F .text	000000000000014d .omp_outlined..64
 | 
					 | 
				
			||||||
0000000000000018 l       .rodata.cst8	0000000000000000 .LCPI152_0
 | 
					 | 
				
			||||||
00000000000000d0 l       .rodata.cst16	0000000000000000 .LCPI152_1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
000000000000c820 l     F .text	0000000000000346 .omp_outlined..68
 | 
					 | 
				
			||||||
0000000000000020 l       .rodata.cst8	0000000000000000 .LCPI155_0
 | 
					 | 
				
			||||||
0000000000000028 l       .rodata.cst8	0000000000000000 .LCPI155_1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 GCC_except_table157
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_
 | 
					 | 
				
			||||||
000000000000cb70 l     F .text	00000000000000e5 .omp_outlined..71
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE	0000000000000000 .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE
 | 
					 | 
				
			||||||
000000000000cc60 l     F .text	0000000000000166 .omp_outlined..72
 | 
					 | 
				
			||||||
000000000000cec0 l     F .text	0000000000000462 .omp_outlined..80
 | 
					 | 
				
			||||||
00000000000000e0 l       .rodata.cst16	0000000000000000 .LCPI163_0
 | 
					 | 
				
			||||||
0000000000000010 l     F .text.startup	0000000000000021 _GLOBAL__sub_I_WilsonFermionInstantiationWilsonImplD.cc
 | 
					 | 
				
			||||||
0000000000000000 l     O .bss	0000000000000001 _ZStL8__ioinit
 | 
					 | 
				
			||||||
0000000000000040 l     F .text.startup	000000000000000a .omp_offloading.requires_reg
 | 
					 | 
				
			||||||
0000000000000000 l    d  .bss	0000000000000000 .bss
 | 
					 | 
				
			||||||
0000000000000001 l     O .bss	0000000000000001 _ZN5EigenL4lastE
 | 
					 | 
				
			||||||
0000000000000002 l     O .bss	0000000000000002 _ZN5EigenL6lastp1E
 | 
					 | 
				
			||||||
0000000000000004 l     O .bss	0000000000000001 _ZN5EigenL3fixILi1EEE
 | 
					 | 
				
			||||||
0000000000000005 l     O .bss	0000000000000001 _ZN5EigenL3allE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata.str1.1	0000000000000000 .rodata.str1.1
 | 
					 | 
				
			||||||
0000000000000000 l     O .rodata.str1.16	0000000000000114 .omp_offloading.entry_name
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata.str1.16	0000000000000000 .rodata.str1.16
 | 
					 | 
				
			||||||
0000000000000120 l     O .rodata.str1.16	0000000000000131 .omp_offloading.entry_name.84
 | 
					 | 
				
			||||||
0000000000000260 l     O .rodata.str1.16	0000000000000102 .omp_offloading.entry_name.85
 | 
					 | 
				
			||||||
0000000000000370 l     O .rodata.str1.16	00000000000000ee .omp_offloading.entry_name.86
 | 
					 | 
				
			||||||
0000000000000460 l     O .rodata.str1.16	000000000000013b .omp_offloading.entry_name.87
 | 
					 | 
				
			||||||
00000000000005a0 l     O .rodata.str1.16	000000000000009e .omp_offloading.entry_name.88
 | 
					 | 
				
			||||||
0000000000000640 l     O .rodata.str1.16	0000000000000130 .omp_offloading.entry_name.89
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	000000000000008b _ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text.__clang_call_terminate	000000000000000b .hidden __clang_call_terminate
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _Unwind_Resume
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	000000000000015b _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTVN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	00000000000001a8 _ZTVN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim	000000000000100f _ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000256 _ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev	0000000000000012 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZdlPv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv	0000000000000009 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv	0000000000000006 _ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv	0000000000000006 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE	000000000000001a _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __assert_fail
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000e2f _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid3FFTC2EPNS_13GridCartesianE	000000000000024b _ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_	00000000000000f7 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	000000000000020a _ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 acos
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	00000000000000ce _ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _Znwm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memmove
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	000000000000100f _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt17__throw_bad_allocv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000222 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000508 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid15GridDefaultLattEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000320 _ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	000000000000018c _ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_	0000000000000168 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000277 _ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE	0000000000000592 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_	0000000000000132 _ZN4Grid12LSComparisonINS_3vgeINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEjEESA_jEENS_7LatticeIS8_EET_RKNSC_IT0_EERKT1_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE	00000000000004ab _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m	000000000000100f _ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE10deallocateEPS7_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	000000000000100f _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt4cout
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12GridLogErrorE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4GridlsERSoRNS_6LoggerE	0000000000000348 _ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 exit
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_	0000000000000266 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6Logger10backgroundB5cxx11Ev	00000000000000e1 _ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid6Logger9timestampE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt6chrono3_V212system_clock3nowEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6Logger8evidenceB5cxx11Ev	00000000000000e1 _ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE	00000000000001b5 _ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid6Logger7devnullE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSolsEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNKSt5ctypeIcE13_M_widen_initEv
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo3putEc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo5flushEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt16__throw_bad_castv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memset
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager9ViewCloseEPvNS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicator8ThisRankEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE	000000000000027d _ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii	00000000000001b4 _ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid18acceleratorThreadsEv
 | 
					 | 
				
			||||||
0000000000000150  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118af7__ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i_l116.region_id
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __tgt_target_kernel
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_global_thread_num
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_push_num_teams
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_fork_teams
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memcpy
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE	000000000000021d _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
00000000000001c0  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEELPS9_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEELPS9_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8ViewOpenENS_8ViewModeE	000000000000021d _ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
00000000000001e8  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE_l190.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_	000000000000013d _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertIlEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_begin_catch
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt9terminatev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memcmp
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	000000000000017e _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_	00000000000002b4 _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E	0000000000000031 _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_rethrow
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_end_catch
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_	000000000000012f _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	000000000000103f _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid14MemoryProfiler5statsE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid14MemoryProfiler5debugE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12GridLogDebugE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 vsnprintf
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z	0000000000000104 _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid10sizeStringB5cxx11Em
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertIPKvEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager11CpuAllocateEm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt20__throw_length_errorPKc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager7CpuFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertImEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager8ViewOpenEPvmNS_8ViewModeENS_10ViewAdviseE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	000000000000103e _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE	000000000000021d _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicator21ProcessorCoorFromRankEiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii	000000000000051b _ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	000000000000059a _ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000b8a _ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12Cshift_tableE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm	00000000000001d0 _ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	000000000000103e _ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	000000000000100f _ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager14SharedAllocateEm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager10SharedFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_for_static_init_8u
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_fork_call
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_for_static_fini
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEC2ERKNS_18LatticeAcceleratorIS7_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv	000000000000103e _ZN4Grid16alignedAllocatorINS_7iScalarINS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	000000000000100e _ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	000000000000100e _ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	000000000000100f _ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager15AcceleratorFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	000000000000103f _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTVN4Grid13GridCartesianE	0000000000000068 _ZTVN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicatorC2ERKNS_17AcceleratorVectorIiLi8EEERKS0_Ri
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_	0000000000000451 _ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
00000000000001e9  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
00000000000001ea  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000228  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesianD0Ev	0000000000000012 _ZN4Grid13GridCartesianD0Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicatorD2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian14CheckerBoardedEi	0000000000000003 _ZN4Grid13GridCartesian14CheckerBoardedEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE	0000000000000003 _ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii	0000000000000003 _ZN4Grid13GridCartesian23CheckerBoardDestinationEiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii	0000000000000003 _ZN4Grid13GridCartesian17CheckerBoardShiftEiiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii	0000000000000003 _ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi	0000000000000003 _ZN4Grid13GridCartesian22CheckerBoardFromOindexEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi	0000000000000003 _ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000096 _ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000096 _ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	000000000000103e _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 cexp
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE	0000000000000174 _ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE
 | 
					 | 
				
			||||||
0000000000000208  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD__l331.region_id
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitC1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitD1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 .hidden __dso_handle
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_atexit
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __tgt_register_requires
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE1MERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MdagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MdirERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_ii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE7MdirAllERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSt6vectorISK_SaISK_EE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MeooeERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MooeeERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooeeInvERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeooeDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooeeDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11MooeeInvDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4DhopERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DhopOEERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DhopEOERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE7DhopDirERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_ii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DhopDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11DhopDerivEOERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11DhopDerivOEERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23MomentumSpacePropagatorERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERKSK_dSt6vectorIdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ImportGaugeERKNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE24ContractConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_SL_NS_7CurrentEj
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000089 _ZTSN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000089 _ZTSN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	000000000000008b _ZTSN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000099 _ZTSN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	000000000000008b _ZTSN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000010 _ZTIN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv117__class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000018 _ZTIN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv120__si_class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE	0000000000000074 _ZTSN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE	0000000000000072 _ZTSN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE	0000000000000059 _ZTSN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE	0000000000000010 _ZTIN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE	0000000000000018 _ZTIN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE	0000000000000018 _ZTIN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv121__vmi_class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid19WilsonKernelsStaticE	000000000000001d _ZTSN4Grid19WilsonKernelsStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid19WilsonKernelsStaticE	0000000000000010 _ZTIN4Grid19WilsonKernelsStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid19WilsonFermionStaticE	000000000000001d _ZTSN4Grid19WilsonFermionStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid19WilsonFermionStaticE	0000000000000010 _ZTIN4Grid19WilsonFermionStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13GridCartesianE	0000000000000018 _ZTIN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13GridCartesianE	0000000000000017 _ZTSN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid8GridBaseE	0000000000000011 _ZTSN4Grid8GridBaseE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid10GridThreadE	0000000000000014 _ZTSN4Grid10GridThreadE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid10GridThreadE	0000000000000010 _ZTIN4Grid10GridThreadE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid8GridBaseE	0000000000000038 _ZTIN4Grid8GridBaseE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTIN4Grid21CartesianCommunicatorE
 | 
					 | 
				
			||||||
0000000000000000  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118af7__ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i_l116
 | 
					 | 
				
			||||||
0000000000000020  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000040  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_12TrinaryWhereENS0_INS1_INS3_IjNS7_IjEEEEEEEESD_SD_EERSD_RKNS_24LatticeTrinaryExpressionIT_T0_T1_T2_EE_l190
 | 
					 | 
				
			||||||
0000000000000060  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000080  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
00000000000000a0  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD__l331
 | 
					 | 
				
			||||||
00000000000000c0  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __gxx_personality_v0
 | 
					 | 
				
			||||||
@@ -1,16 +0,0 @@
 | 
				
			|||||||
 | 
					 | 
				
			||||||
libWilsonFermionWorks1.a(WilsonFermionWorks1.o):	file format elf64-x86-64
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYMBOL TABLE:
 | 
					 | 
				
			||||||
0000000000000000 l    df *ABS*	0000000000000000 WilsonFermionWorks1.cc
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text.startup	0000000000000000 .text.startup
 | 
					 | 
				
			||||||
0000000000000000 l     F .text.startup	0000000000000021 _GLOBAL__sub_I_WilsonFermionWorks1.cc
 | 
					 | 
				
			||||||
0000000000000000 l     O .bss	0000000000000001 _ZStL8__ioinit
 | 
					 | 
				
			||||||
0000000000000000 l    d  .bss	0000000000000000 .bss
 | 
					 | 
				
			||||||
0000000000000001 l     O .bss	0000000000000001 _ZN5EigenL4lastE
 | 
					 | 
				
			||||||
0000000000000002 l     O .bss	0000000000000002 _ZN5EigenL6lastp1E
 | 
					 | 
				
			||||||
0000000000000004 l     O .bss	0000000000000001 _ZN5EigenL3allE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitC1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitD1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 .hidden __dso_handle
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_atexit
 | 
					 | 
				
			||||||
@@ -1,545 +0,0 @@
 | 
				
			|||||||
 | 
					 | 
				
			||||||
libWilsonFermionWorks2.a(WilsonFermionWorks2.o):	file format elf64-x86-64
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYMBOL TABLE:
 | 
					 | 
				
			||||||
0000000000000000 l    df *ABS*	0000000000000000 WilsonFermionWorks2.cc
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text	0000000000000000 .text
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	0000000000000000 GCC_except_table1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	0000000000000000 GCC_except_table3
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv	0000000000000000 .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000 l       .rodata.cst8	0000000000000000 .LCPI20_0
 | 
					 | 
				
			||||||
0000000000000008 l       .rodata.cst8	0000000000000000 .LCPI20_1
 | 
					 | 
				
			||||||
0000000000000000 l       .rodata.cst16	0000000000000000 .LCPI20_2
 | 
					 | 
				
			||||||
0000000000000010 l       .rodata.cst16	0000000000000000 .LCPI20_3
 | 
					 | 
				
			||||||
0000000000000020 l       .rodata.cst16	0000000000000000 .LCPI20_4
 | 
					 | 
				
			||||||
0000000000000030 l       .rodata.cst16	0000000000000000 .LCPI20_5
 | 
					 | 
				
			||||||
0000000000000040 l       .rodata.cst16	0000000000000000 .LCPI20_6
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000000 GCC_except_table20
 | 
					 | 
				
			||||||
0000000000000010 l       .rodata.cst8	0000000000000000 .LCPI21_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000000 GCC_except_table21
 | 
					 | 
				
			||||||
0000000000000050 l       .rodata.cst16	0000000000000000 .LCPI22_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 GCC_except_table22
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000000 .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 .text._ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4GridlsERSoRNS_6LoggerE	0000000000000000 GCC_except_table31
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 GCC_except_table35
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	0000000000000000 GCC_except_table36
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 GCC_except_table38
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000 l     F .text	000000000000010d .omp_outlined.
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000000 GCC_except_table39
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000001160 l     F .text	00000000000000e5 .omp_outlined..44
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table41
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 .text._ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6Logger10backgroundB5cxx11Ev	0000000000000000 GCC_except_table42
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE	0000000000000000 .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 .text._ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid6Logger8evidenceB5cxx11Ev	0000000000000000 GCC_except_table44
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_	0000000000000000 .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_
 | 
					 | 
				
			||||||
0000000000000100 l     O .rodata	0000000000000001 _ZStL19piecewise_construct
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	0000000000000000 GCC_except_table47
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_	0000000000000000 .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 GCC_except_table53
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 GCC_except_table54
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z	0000000000000000 .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	0000000000000000 GCC_except_table61
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	0000000000000000 GCC_except_table62
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table67
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii	0000000000000000 .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000060 l       .rodata.cst16	0000000000000000 .LCPI69_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii	0000000000000000 .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000000 GCC_except_table70
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	0000000000000000 GCC_except_table71
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	0000000000000000 GCC_except_table72
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm	0000000000000000 .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	0000000000000000 GCC_except_table74
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 GCC_except_table75
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000110 l     F .text	000000000000104c .omp_outlined..41
 | 
					 | 
				
			||||||
0000000000000070 l       .rodata.cst16	0000000000000000 .LCPI78_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	0000000000000000 GCC_except_table79
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE
 | 
					 | 
				
			||||||
0000000000001250 l     F .text	000000000000ad36 .omp_outlined..45
 | 
					 | 
				
			||||||
0000000000000080 l       .rodata.cst16	0000000000000000 .LCPI85_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata	0000000000000000 .rodata
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000000 GCC_except_table89
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000000 GCC_except_table90
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12uvmAllocatorIiE10deallocateEPim	0000000000000000 GCC_except_table91
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	0000000000000000 GCC_except_table92
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 GCC_except_table93
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	0000000000000000 GCC_except_table94
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	0000000000000000 GCC_except_table95
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	0000000000000000 GCC_except_table96
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text.startup	0000000000000000 .text.startup
 | 
					 | 
				
			||||||
0000000000000000 l     F .text.startup	0000000000000001 __cxx_global_var_init.52
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	0000000000000000 GCC_except_table101
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000090 l       .rodata.cst16	0000000000000000 .LCPI108_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 .text._ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid3FFTC2EPNS_13GridCartesianE	0000000000000000 GCC_except_table108
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000000 GCC_except_table112
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000bf90 l     F .text	00000000000000e5 .omp_outlined..59
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000c1d0 l     F .text	00000000000000e5 .omp_outlined..63
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
000000000000c870 l     F .text	00000000000000e5 .omp_outlined..75
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table116
 | 
					 | 
				
			||||||
00000000000000a0 l       .rodata.cst16	0000000000000000 .LCPI117_0
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_	0000000000000000 .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesianD0Ev	0000000000000000 .text._ZN4Grid13GridCartesianD0Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian14CheckerBoardedEi	0000000000000000 .text._ZN4Grid13GridCartesian14CheckerBoardedEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii	0000000000000000 .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii	0000000000000000 .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii	0000000000000000 .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi	0000000000000000 .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi	0000000000000000 .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000000 .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	0000000000000000 GCC_except_table130
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	0000000000000000 GCC_except_table131
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	0000000000000000 .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	0000000000000000 GCC_except_table135
 | 
					 | 
				
			||||||
000000000000c080 l     F .text	000000000000014d .omp_outlined..60
 | 
					 | 
				
			||||||
0000000000000018 l       .rodata.cst8	0000000000000000 .LCPI137_0
 | 
					 | 
				
			||||||
00000000000000b0 l       .rodata.cst16	0000000000000000 .LCPI137_1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	0000000000000000 .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
000000000000c2c0 l     F .text	0000000000000346 .omp_outlined..64
 | 
					 | 
				
			||||||
0000000000000020 l       .rodata.cst8	0000000000000000 .LCPI140_0
 | 
					 | 
				
			||||||
0000000000000028 l       .rodata.cst8	0000000000000000 .LCPI140_1
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000000 .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000 l    d  .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000 l       .gcc_except_table._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	0000000000000000 GCC_except_table142
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_	0000000000000000 .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_
 | 
					 | 
				
			||||||
000000000000c610 l     F .text	00000000000000e5 .omp_outlined..67
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE	0000000000000000 .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE
 | 
					 | 
				
			||||||
000000000000c700 l     F .text	0000000000000166 .omp_outlined..68
 | 
					 | 
				
			||||||
000000000000c960 l     F .text	0000000000000462 .omp_outlined..76
 | 
					 | 
				
			||||||
00000000000000c0 l       .rodata.cst16	0000000000000000 .LCPI148_0
 | 
					 | 
				
			||||||
0000000000000010 l     F .text.startup	0000000000000021 _GLOBAL__sub_I_WilsonFermionWorks2.cc
 | 
					 | 
				
			||||||
0000000000000000 l     O .bss	0000000000000001 _ZStL8__ioinit
 | 
					 | 
				
			||||||
0000000000000040 l     F .text.startup	000000000000000a .omp_offloading.requires_reg
 | 
					 | 
				
			||||||
0000000000000000 l    d  .bss	0000000000000000 .bss
 | 
					 | 
				
			||||||
0000000000000001 l     O .bss	0000000000000001 _ZN5EigenL4lastE
 | 
					 | 
				
			||||||
0000000000000002 l     O .bss	0000000000000002 _ZN5EigenL6lastp1E
 | 
					 | 
				
			||||||
0000000000000004 l     O .bss	0000000000000001 _ZN5EigenL3fixILi1EEE
 | 
					 | 
				
			||||||
0000000000000005 l     O .bss	0000000000000001 _ZN5EigenL3allE
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata.str1.1	0000000000000000 .rodata.str1.1
 | 
					 | 
				
			||||||
0000000000000000 l     O .rodata.str1.16	0000000000000114 .omp_offloading.entry_name
 | 
					 | 
				
			||||||
0000000000000000 l    d  .rodata.str1.16	0000000000000000 .rodata.str1.16
 | 
					 | 
				
			||||||
0000000000000120 l     O .rodata.str1.16	0000000000000131 .omp_offloading.entry_name.80
 | 
					 | 
				
			||||||
0000000000000260 l     O .rodata.str1.16	00000000000000ee .omp_offloading.entry_name.81
 | 
					 | 
				
			||||||
0000000000000350 l     O .rodata.str1.16	000000000000013b .omp_offloading.entry_name.82
 | 
					 | 
				
			||||||
0000000000000490 l     O .rodata.str1.16	000000000000009e .omp_offloading.entry_name.83
 | 
					 | 
				
			||||||
0000000000000530 l     O .rodata.str1.16	0000000000000130 .omp_offloading.entry_name.84
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4GridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_	000000000000008b _ZN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE5MdagMERKSE_RSE_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text.__clang_call_terminate	000000000000000b .hidden __clang_call_terminate
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _Unwind_Resume
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MdiagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev	000000000000015b _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTVN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	00000000000001a8 _ZTVN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorIiE10deallocateEPim	000000000000100f _ZN4Grid12uvmAllocatorIiE10deallocateEPim
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE10deallocateEPSD_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev	0000000000000256 _ZN4Grid16CartesianStencilINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEENS1_INS2_ISA_Li2EEEEENS_16WilsonImplParamsEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev	0000000000000012 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEED0Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZdlPv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv	0000000000000009 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE12RedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv	0000000000000009 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MassEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv	0000000000000006 _ZN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEE7ConstEEEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv	0000000000000006 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11isTrivialEEEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE3tmpEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE	000000000000001a _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14DirichletBlockERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __assert_fail
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11FermionGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19FermionRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9GaugeGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv	0000000000000008 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE17GaugeRedBlackGridEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6MDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MoeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000c _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeoDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEaSINS_4ZeroEEERSE_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeeDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE	0000000000000e2f _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_dSt6vectorIS4_SaIS4_EESO_IdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid3FFTC2EPNS_13GridCartesianE	000000000000024b _ZN4Grid3FFTC2EPNS_13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_	00000000000000f7 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_4ZeroEEERSC_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi	000000000000020a _ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 acos
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi	00000000000000ce _ZN4Grid3FFT12FFT_dim_maskINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSG_NS_17AcceleratorVectorIiLi8EEEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _Znwm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memmove
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE	00000000000003d2 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m	000000000000100f _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE10deallocateEPSB_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt17__throw_bad_allocv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d	0000000000000222 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE14FreePropagatorERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_d
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE	0000000000000466 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE19SeqConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_NS_7CurrentEjjjRNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid15GridDefaultLattEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_4ZeroEEERSD_RKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE	000000000000021c _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2EPNS_8GridBaseENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi	0000000000000320 _ZN4Grid17LatticeCoordinateINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEvRNS_7LatticeIT_EEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii	000000000000018c _ZN4Grid6CshiftINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEENS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_	0000000000000168 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSEOSD_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m	000000000000100e _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE10deallocateEPSC_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i	0000000000000277 _ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE	00000000000000ab _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE	0000000000000592 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m	000000000000100f _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE10deallocateEPS9_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt4cout
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12GridLogErrorE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4GridlsERSoRNS_6LoggerE	0000000000000348 _ZN4GridlsERSoRNS_6LoggerE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 exit
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev	0000000000000037 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE	000000000000000e _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ContractJ5qERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEERNSE_INSF_INSF_INSF_IS8_EEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DminusERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_	0000000000000266 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DminusDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ImportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23ImportUnphysicalFermionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE29ExportPhysicalFermionSolutionERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_	0000000000000008 _ZN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE27ExportPhysicalFermionSourceERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6Logger10backgroundB5cxx11Ev	00000000000000e1 _ZN4Grid6Logger10backgroundB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid6Logger9timestampE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt6chrono3_V212system_clock3nowEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid6Logger8evidenceB5cxx11Ev	00000000000000e1 _ZN4Grid6Logger8evidenceB5cxx11Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE	00000000000001b5 _ZN4GridlsERSoRKNSt6chrono8durationIlSt5ratioILl1ELl1000000EEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid6Logger7devnullE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSolsEi
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNKSt5ctypeIcE13_M_widen_initEv
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo3putEc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo5flushEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt16__throw_bad_castv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memset
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager9ViewCloseEPvNS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicator8ThisRankEv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE	000000000000027d _ZN4Grid8GridBase21RankIndexToGlobalCoorEiiiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii	00000000000001b4 _ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_ii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid18acceleratorThreadsEv
 | 
					 | 
				
			||||||
0000000000000150  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118af7__ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i_l116.region_id
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __tgt_target_kernel
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_global_thread_num
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_push_num_teams
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_fork_teams
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEED2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iMatrixINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEELPSD_0EEEvRiRKT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memcpy
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE	000000000000021d _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
00000000000001c0  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_	000000000000013d _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES5_St4lessIS5_ESaISt4pairIKS5_S5_EEEixEOS5_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertIlEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_begin_catch
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt9terminatev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 memcmp
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_	000000000000017e _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE22_M_emplace_hint_uniqueIJRKSt21piecewise_construct_tSt5tupleIJOS5_EESJ_IJEEEEESt17_Rb_tree_iteratorIS8_ESt23_Rb_tree_const_iteratorIS8_EDpOT_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_	00000000000002b4 _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS8_ERS7_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E	0000000000000031 _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS8_E
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_rethrow
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_end_catch
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_	000000000000012f _ZNSt8_Rb_treeINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt4pairIKS5_S5_ESt10_Select1stIS8_ESt4lessIS5_ESaIS8_EE24_M_get_insert_unique_posERS7_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	000000000000103f _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid14MemoryProfiler5statsE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid14MemoryProfiler5debugE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12GridLogDebugE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 vsnprintf
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z	0000000000000104 _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid10sizeStringB5cxx11Em
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertIPKvEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager11CpuAllocateEm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZSt20__throw_length_errorPKc
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager7CpuFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSo9_M_insertImEERSoT_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager8ViewOpenEPvmNS_8ViewModeENS_10ViewAdviseE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv	000000000000103e _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorIS9_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE	000000000000021d _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdIjNS_12Optimization3vecIjEEEEEEEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicator21ProcessorCoorFromRankEiRNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii	000000000000051b _ZN4Grid12Cshift_localINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii	000000000000059a _ZN4Grid10Copy_planeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii	0000000000000b8a _ZN4Grid18Copy_plane_permuteINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEvRNS_7LatticeIT_EERKSF_iiiii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid12Cshift_tableE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm	00000000000001d0 _ZNSt6vectorISt4pairIiiEN4Grid12uvmAllocatorIS1_EEE17_M_default_appendEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv	000000000000103e _ZN4Grid12uvmAllocatorISt4pairIiiEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m	000000000000100f _ZN4Grid12uvmAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager14SharedAllocateEm
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager10SharedFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_for_static_init_8u
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_fork_call
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __kmpc_for_static_fini
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi8EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m	000000000000100e _ZN4Grid12devAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m	000000000000100e _ZN4Grid16alignedAllocatorINS_12StencilEntryEE10deallocateEPS1_m
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m	000000000000100f _ZN4Grid12devAllocatorISt4pairIiiEE10deallocateEPS2_m
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13MemoryManager15AcceleratorFreeEPvm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv	000000000000103f _ZN4Grid16alignedAllocatorINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEC2ERKNS_18LatticeAcceleratorISC_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEEC2ERKNS_18LatticeAcceleratorISD_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE	0000000000000225 _ZN4Grid11LatticeViewINS_7iVectorINS_7iScalarINS_7iMatrixINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEEEELi4EEEE8ViewOpenENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTVN4Grid13GridCartesianE	0000000000000068 _ZTVN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicatorC2ERKNS_17AcceleratorVectorIiLi8EEERKS0_Ri
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_	0000000000000451 _ZN4Grid13GridCartesian4InitERKNS_17AcceleratorVectorIiLi8EEES4_S4_
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm	00000000000000e3 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE6resizeEm
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE	00000000000001d8 _ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE11SetViewModeENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE	00000000000000b6 _ZNK4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEE4ViewENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev	000000000000001a _ZN4Grid10ViewCloserINS_11LatticeViewINS_7iScalarINS2_INS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEEED2Ev
 | 
					 | 
				
			||||||
00000000000001e8  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_	0000000000000164 _ZN4Grid16CBFromExpressionINS_11LatticeViewINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEELPSE_0EEEvRiRKT_
 | 
					 | 
				
			||||||
00000000000001e9  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000228  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE_l166.region_id
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesianD0Ev	0000000000000012 _ZN4Grid13GridCartesianD0Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid21CartesianCommunicatorD2Ev
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian14CheckerBoardedEi	0000000000000003 _ZN4Grid13GridCartesian14CheckerBoardedEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE	0000000000000003 _ZN4Grid13GridCartesian12CheckerBoardERKNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian23CheckerBoardDestinationEiii	0000000000000003 _ZN4Grid13GridCartesian23CheckerBoardDestinationEiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian17CheckerBoardShiftEiiii	0000000000000003 _ZN4Grid13GridCartesian17CheckerBoardShiftEiiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii	0000000000000003 _ZN4Grid13GridCartesian22CheckerBoardShiftForCBEiiii
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian22CheckerBoardFromOindexEi	0000000000000003 _ZN4Grid13GridCartesian22CheckerBoardFromOindexEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi	0000000000000003 _ZN4Grid13GridCartesian27CheckerBoardFromOindexTableEi
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000096 _ZN4Grid8GridBase6oIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE	0000000000000096 _ZN4Grid8GridBase6iIndexERNS_17AcceleratorVectorIiLi8EEE
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv	000000000000103e _ZN4Grid16alignedAllocatorINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEE8allocateEmPKv
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE	00000000000000c5 _ZN4Grid11LatticeViewINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEC2ERKNS_18LatticeAcceleratorISB_EENS_8ViewModeE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 cexp
 | 
					 | 
				
			||||||
0000000000000000  w    F .text._ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE	0000000000000174 _ZN4Grid11conformableINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEESC_EEvRKNS_7LatticeIT_EERKNSD_IT0_EE
 | 
					 | 
				
			||||||
0000000000000208  w    O .rodata	0000000000000001 .__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD__l331.region_id
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitC1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitD1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 .hidden __dso_handle
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_atexit
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __tgt_register_requires
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE1MERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MdagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4MdirERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_ii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE7MdirAllERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSt6vectorISK_SaISK_EE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MeooeERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE5MooeeERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooeeInvERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MeooeDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE8MooeeDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11MooeeInvDagERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE4DhopERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DhopOEERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE6DhopEOERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE7DhopDirERKNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERSK_ii
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE9DhopDerivERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11DhopDerivEOERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11DhopDerivOEERNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEERKNSE_INSG_INSF_INSF_IS8_Li3EEELi4EEEEEEESS_i
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE23MomentumSpacePropagatorERNS_7LatticeINS_7iScalarINS_7iVectorINSG_IS8_Li3EEELi4EEEEEEERKSK_dSt6vectorIdSaIdEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE11ImportGaugeERKNS_7LatticeINS_7iVectorINS_7iScalarINS_7iMatrixIS8_Li3EEEEELi4EEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEE24ContractConservedCurrentERNS_7LatticeINS_7iScalarINS_7iMatrixINSG_IS8_Li3EEELi4EEEEEEESL_SL_SL_NS_7CurrentEj
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000089 _ZTSN4Grid13WilsonFermionINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000089 _ZTSN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	000000000000008b _ZTSN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000099 _ZTSN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	000000000000008b _ZTSN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000010 _ZTIN4Grid16SparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv117__class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE	0000000000000018 _ZTIN4Grid30CheckerBoardedSparseMatrixBaseINS_7LatticeINS_7iScalarINS_7iVectorINS3_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv120__si_class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE	0000000000000074 _ZTSN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE	0000000000000072 _ZTSN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE	0000000000000059 _ZTSN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE	0000000000000010 _ZTIN4Grid14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE	0000000000000018 _ZTIN4Grid17PeriodicGaugeImplINS_14GaugeImplTypesINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3ELi12EEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE	0000000000000018 _ZTIN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid15FermionOperatorINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTVN10__cxxabiv121__vmi_class_type_infoE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid19WilsonKernelsStaticE	000000000000001d _ZTSN4Grid19WilsonKernelsStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid19WilsonKernelsStaticE	0000000000000010 _ZTIN4Grid19WilsonKernelsStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE	0000000000000038 _ZTIN4Grid13WilsonKernelsINS_10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEEEEE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid19WilsonFermionStaticE	000000000000001d _ZTSN4Grid19WilsonFermionStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid19WilsonFermionStaticE	0000000000000010 _ZTIN4Grid19WilsonFermionStaticE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid13GridCartesianE	0000000000000018 _ZTIN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid13GridCartesianE	0000000000000017 _ZTSN4Grid13GridCartesianE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid8GridBaseE	0000000000000011 _ZTSN4Grid8GridBaseE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTSN4Grid10GridThreadE	0000000000000014 _ZTSN4Grid10GridThreadE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid10GridThreadE	0000000000000010 _ZTIN4Grid10GridThreadE
 | 
					 | 
				
			||||||
0000000000000000  w    O .rodata._ZTIN4Grid8GridBaseE	0000000000000038 _ZTIN4Grid8GridBaseE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZTIN4Grid21CartesianCommunicatorE
 | 
					 | 
				
			||||||
0000000000000000  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118af7__ZN4Grid10WilsonImplINS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEENS_14FundamentalRepILi3EEENS_9CoeffRealEE13multLinkFieldINS_7LatticeINS_7iScalarINS_7iMatrixINSF_IS7_Li3EEELi4EEEEEEEEEvRT_RKNSD_INS_7iVectorINSE_ISG_EELi8EEEEERKSK_i_l116
 | 
					 | 
				
			||||||
0000000000000020  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iMatrixINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinarySubENS_23LatticeBinaryExpressionINS_9BinaryMulESD_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEENSG_ISH_NSG_ISH_NS_5GammaESD_EESL_EEEERSD_RKNSG_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000040  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS1_INS1_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEEEEEEEEEaSINS_9BinaryAddESC_NS_23LatticeBinaryExpressionINS_9BinaryMulENSF_ISG_dSC_EEdEEEERSC_RKNSF_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000060  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulENS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_NSI_ISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEdEEEESD_EERSD_RKNSI_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000080  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSERKSD__l331
 | 
					 | 
				
			||||||
00000000000000a0  w    O omp_offloading_entries	0000000000000020 .omp_offloading.entry.__omp_offloading_73_1e118ab9__ZN4Grid7LatticeINS_7iScalarINS_7iVectorINS2_INS_9Grid_simdISt7complexIdENS_12Optimization3vecIdEEEELi3EEELi4EEEEEEaSINS_9BinaryMulESD_NS_22LatticeUnaryExpressionINS_8UnaryExpENS_23LatticeBinaryExpressionISF_S5_NS0_INS1_INS1_INS1_IS9_EEEEEEEEEEEEEERSD_RKNSI_IT_T0_T1_EE_l166
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __gxx_personality_v0
 | 
					 | 
				
			||||||
@@ -1,16 +0,0 @@
 | 
				
			|||||||
 | 
					 | 
				
			||||||
libWilsonFermionWorks3.a(WilsonFermionWorks3.o):	file format elf64-x86-64
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYMBOL TABLE:
 | 
					 | 
				
			||||||
0000000000000000 l    df *ABS*	0000000000000000 WilsonFermionWorks3.cc
 | 
					 | 
				
			||||||
0000000000000000 l    d  .text.startup	0000000000000000 .text.startup
 | 
					 | 
				
			||||||
0000000000000000 l     F .text.startup	0000000000000021 _GLOBAL__sub_I_WilsonFermionWorks3.cc
 | 
					 | 
				
			||||||
0000000000000000 l     O .bss	0000000000000001 _ZStL8__ioinit
 | 
					 | 
				
			||||||
0000000000000000 l    d  .bss	0000000000000000 .bss
 | 
					 | 
				
			||||||
0000000000000001 l     O .bss	0000000000000001 _ZN5EigenL4lastE
 | 
					 | 
				
			||||||
0000000000000002 l     O .bss	0000000000000002 _ZN5EigenL6lastp1E
 | 
					 | 
				
			||||||
0000000000000004 l     O .bss	0000000000000001 _ZN5EigenL3allE
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitC1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 _ZNSt8ios_base4InitD1Ev
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 .hidden __dso_handle
 | 
					 | 
				
			||||||
0000000000000000         *UND*	0000000000000000 __cxa_atexit
 | 
					 | 
				
			||||||
@@ -96,9 +96,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    pokeSite(tmp,src,origin);
 | 
					    pokeSite(tmp,src,origin);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
  std::cout << GridLogMessage << "Drawing gauge field1" << std::endl;
 | 
					 | 
				
			||||||
  RealD N2 = 1.0/::sqrt(norm2(src));
 | 
					  RealD N2 = 1.0/::sqrt(norm2(src));
 | 
				
			||||||
  std::cout << GridLogMessage << "Drawing gauge field3" << std::endl;
 | 
					 | 
				
			||||||
  src = src*N2;
 | 
					  src = src*N2;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -217,12 +215,8 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl;
 | 
					    std::cout<<GridLogMessage << "mflop/s per node =  "<< flops/(t1-t0)/NN<<std::endl;
 | 
				
			||||||
    std::cout<<GridLogMessage << "RF  GiB/s (base 2) =   "<< 1000000. * data_rf/((t1-t0))<<std::endl;
 | 
					    std::cout<<GridLogMessage << "RF  GiB/s (base 2) =   "<< 1000000. * data_rf/((t1-t0))<<std::endl;
 | 
				
			||||||
    std::cout<<GridLogMessage << "mem GiB/s (base 2) =   "<< 1000000. * data_mem/((t1-t0))<<std::endl;
 | 
					    std::cout<<GridLogMessage << "mem GiB/s (base 2) =   "<< 1000000. * data_mem/((t1-t0))<<std::endl;
 | 
				
			||||||
//#pragma omp target is_device_ptr ( err.View(CpuWrite), ref.View(CpuWrite), result.View(CpuWrite) )
 | 
					    err = ref-result;
 | 
				
			||||||
    ref-result;
 | 
					    std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
 | 
				
			||||||
    //err = ref-result;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    std::cout<<GridLogMessage << "norm diff 0  "<<std::endl;
 | 
					 | 
				
			||||||
    std::cout<<GridLogMessage << "norm diff   "<< norm2(err) << " norm diff 1 " <<std::endl;
 | 
					 | 
				
			||||||
    //exit(0);
 | 
					    //exit(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(( norm2(err)>1.0e-4) ) {
 | 
					    if(( norm2(err)>1.0e-4) ) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,22 +36,19 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  Grid_init(&argc,&argv);
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define LMAX (8)
 | 
					#define LMAX (40)
 | 
				
			||||||
#define LMIN (8)
 | 
					#define LMIN (8)
 | 
				
			||||||
#define LADD (8)
 | 
					#define LADD (8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int64_t Nwarm=500;
 | 
					  int64_t Nwarm=10;
 | 
				
			||||||
  int64_t Nloop=1500;
 | 
					  int64_t Nloop=100;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
					  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
				
			||||||
  Coordinate mpi_layout  = GridDefaultMpi();
 | 
					  Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int64_t threads = GridThread::GetThreads();
 | 
					  int64_t threads = GridThread::GetThreads();
 | 
				
			||||||
  int64_t accelerator_threads = acceleratorThreads();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "Grid is setup with LMAX="<< LMAX << ", LMIN=" << LMIN << ", LADD=" << LADD << ", Nwarm, Nloop =" << Nwarm <<"," << Nloop <<std::endl;
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<accelerator_threads<<" GPU threads"<<std::endl;
 | 
					
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  x= x*y"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  x= x*y"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
@@ -225,7 +222,6 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if 1
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
@@ -258,9 +254,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
						    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#if 1
 | 
				
			||||||
 | 
					 | 
				
			||||||
#if 0
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										968
									
								
								benchmarks/Benchmark_usqcd.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										968
									
								
								benchmarks/Benchmark_usqcd.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,968 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./benchmarks/Benchmark_usqcd.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/algorithms/blas/BatchedBlas.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std::vector<int> L_list;
 | 
				
			||||||
 | 
					std::vector<int> Ls_list;
 | 
				
			||||||
 | 
					std::vector<double> mflop_list;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					double mflop_ref;
 | 
				
			||||||
 | 
					double mflop_ref_err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int NN_global;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FILE * FP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct time_statistics{
 | 
				
			||||||
 | 
					  double mean;
 | 
				
			||||||
 | 
					  double err;
 | 
				
			||||||
 | 
					  double min;
 | 
				
			||||||
 | 
					  double max;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void statistics(std::vector<double> v){
 | 
				
			||||||
 | 
					      double sum = std::accumulate(v.begin(), v.end(), 0.0);
 | 
				
			||||||
 | 
					      mean = sum / v.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::vector<double> diff(v.size());
 | 
				
			||||||
 | 
					      std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
 | 
				
			||||||
 | 
					      double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
 | 
				
			||||||
 | 
					      err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      auto result = std::minmax_element(v.begin(), v.end());
 | 
				
			||||||
 | 
					      min = *result.first;
 | 
				
			||||||
 | 
					      max = *result.second;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void comms_header(){
 | 
				
			||||||
 | 
					  std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t"
 | 
				
			||||||
 | 
					            <<"bytes\t MB/s uni  \t\t MB/s bidi "<<std::endl;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct controls {
 | 
				
			||||||
 | 
					  int Opt;
 | 
				
			||||||
 | 
					  int CommsOverlap;
 | 
				
			||||||
 | 
					  Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Benchmark {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  static void Decomposition (void ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "= Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tMPI tasks      : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvReal          : "<<sizeof(vReal )*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vReal::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvRealF         : "<<sizeof(vRealF)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvRealD         : "<<sizeof(vRealD)*8    <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvComplex       : "<<sizeof(vComplex )*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplex::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvComplexF      : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\tvComplexD      : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void Comms(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int Nloop=200;
 | 
				
			||||||
 | 
					    int nmu=0;
 | 
				
			||||||
 | 
					    int maxlat=32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
				
			||||||
 | 
					    Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<double> t_time(Nloop);
 | 
				
			||||||
 | 
					    time_statistics timestat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "= Benchmarking threaded STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    comms_header();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(FP,"Communications\n\n");
 | 
				
			||||||
 | 
					    fprintf(FP,"Packet bytes, direction, GB/s per node\n");
 | 
				
			||||||
 | 
					    for(int lat=16;lat<=maxlat;lat+=8){
 | 
				
			||||||
 | 
					      //      for(int Ls=8;Ls<=8;Ls*=2){
 | 
				
			||||||
 | 
					      { int Ls=12;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Coordinate latt_size  ({lat*mpi_layout[0],
 | 
				
			||||||
 | 
						      lat*mpi_layout[1],
 | 
				
			||||||
 | 
						      lat*mpi_layout[2],
 | 
				
			||||||
 | 
						      lat*mpi_layout[3]});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
						RealD Nrank = Grid._Nprocessors;
 | 
				
			||||||
 | 
						RealD Nnode = Grid.NodeCount();
 | 
				
			||||||
 | 
						RealD ppn = Nrank/Nnode;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
				
			||||||
 | 
						std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
				
			||||||
 | 
						//Grid.ShmBufferFreeAll();
 | 
				
			||||||
 | 
						uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
				
			||||||
 | 
						for(int d=0;d<8;d++){
 | 
				
			||||||
 | 
						  xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
 | 
				
			||||||
 | 
						  rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
 | 
				
			||||||
 | 
						  //	  bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
 | 
						  //	  bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//	int ncomm;
 | 
				
			||||||
 | 
						double dbytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for(int dir=0;dir<8;dir++) {
 | 
				
			||||||
 | 
						  int mu =dir % 4;
 | 
				
			||||||
 | 
						  if (mpi_layout[mu]>1 ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    std::vector<double> times(Nloop);
 | 
				
			||||||
 | 
						    for(int i=0;i<Nloop;i++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						      dbytes=0;	        
 | 
				
			||||||
 | 
						      double start=usecond();
 | 
				
			||||||
 | 
						      int xmit_to_rank;
 | 
				
			||||||
 | 
						      int recv_from_rank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						      if ( dir == mu ) { 
 | 
				
			||||||
 | 
							int comm_proc=1;
 | 
				
			||||||
 | 
							Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
				
			||||||
 | 
						      } else { 
 | 
				
			||||||
 | 
							int comm_proc = mpi_layout[mu]-1;
 | 
				
			||||||
 | 
							Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
				
			||||||
 | 
						      }
 | 
				
			||||||
 | 
						      Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
 | 
				
			||||||
 | 
									  (void *)&rbuf[dir][0], recv_from_rank,
 | 
				
			||||||
 | 
									  bytes);
 | 
				
			||||||
 | 
						      dbytes+=bytes;
 | 
				
			||||||
 | 
						     
 | 
				
			||||||
 | 
						      double stop=usecond();
 | 
				
			||||||
 | 
						      t_time[i] = stop-start; // microseconds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    }
 | 
				
			||||||
 | 
						    timestat.statistics(t_time);
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						    dbytes=dbytes*ppn;
 | 
				
			||||||
 | 
						    double xbytes    = dbytes*0.5;
 | 
				
			||||||
 | 
						    double bidibytes = dbytes;
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						    std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
 | 
				
			||||||
 | 
							     << bytes << " \t "
 | 
				
			||||||
 | 
							     <<xbytes/timestat.mean
 | 
				
			||||||
 | 
							     << "\t\t"
 | 
				
			||||||
 | 
							     << bidibytes/timestat.mean<< std::endl;
 | 
				
			||||||
 | 
						    fprintf(FP,"%ld, %d, %f\n",(long)bytes,dir,bidibytes/timestat.mean/1000.);
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for(int d=0;d<8;d++){
 | 
				
			||||||
 | 
						  acceleratorFreeDevice(xbuf[d]);
 | 
				
			||||||
 | 
						  acceleratorFreeDevice(rbuf[d]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(FP,"\n\n");
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static void Memory(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    const int Nvec=8;
 | 
				
			||||||
 | 
					    typedef Lattice< iVector< vReal,Nvec> > LatticeVec;
 | 
				
			||||||
 | 
					    typedef iVector<vReal,Nvec> Vec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Coordinate simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
 | 
				
			||||||
 | 
					    Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fprintf(FP,"Memory Bandwidth\n\n");
 | 
				
			||||||
 | 
					    fprintf(FP,"Bytes, GB/s per node\n");
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "= Benchmarking a*x + y bandwidth"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    //    uint64_t NP;
 | 
				
			||||||
 | 
					    uint64_t NN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  uint64_t lmax=40;
 | 
				
			||||||
 | 
					#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
				
			||||||
 | 
					    for(int lat=8;lat<=lmax;lat+=8){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      Coordinate latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
				
			||||||
 | 
					      int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      //      NP= Grid.RankCount();
 | 
				
			||||||
 | 
					      NN =Grid.NodeCount();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      Vec rn ; random(sRNG,rn);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      LatticeVec z(&Grid); z=Zero();
 | 
				
			||||||
 | 
					      LatticeVec x(&Grid); x=Zero();
 | 
				
			||||||
 | 
					      LatticeVec y(&Grid); y=Zero();
 | 
				
			||||||
 | 
					      double a=2.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      uint64_t Nloop=NLOOP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      double start=usecond();
 | 
				
			||||||
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
 | 
						z=a*x-y;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      double stop=usecond();
 | 
				
			||||||
 | 
					      double time = (stop-start)/Nloop*1000;
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					      double flops=vol*Nvec*2;// mul,add
 | 
				
			||||||
 | 
					      double bytes=3.0*vol*Nvec*sizeof(Real);
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<<std::setprecision(3) 
 | 
				
			||||||
 | 
						       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
 | 
				
			||||||
 | 
						       << "\t\t"<< bytes/time/NN <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      fprintf(FP,"%ld, %f\n",(long)bytes,bytes/time/NN);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(FP,"\n\n");
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void BLAS(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    //int nbasis, int nrhs, int coarseVol
 | 
				
			||||||
 | 
					    int  basis[] = { 16,32,64 };
 | 
				
			||||||
 | 
					    int  rhs[]   = { 8,16,32 };
 | 
				
			||||||
 | 
					    int  vol  = 4*4*4*4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GridBLAS blas;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "= batched GEMM (double precision) "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "  M  "<<"\t\t"<<"N"<<"\t\t\t"<<"K"<<"\t\t"<<"Gflop/s / rank (coarse mrhs)"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    fprintf(FP,"GEMM\n\n M, N, K, BATCH, GF/s per rank\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int b=0;b<3;b++){
 | 
				
			||||||
 | 
					    for(int r=0;r<3;r++){
 | 
				
			||||||
 | 
					      int M=basis[b];
 | 
				
			||||||
 | 
					      int N=rhs[r];
 | 
				
			||||||
 | 
					      int K=basis[b];
 | 
				
			||||||
 | 
					      int BATCH=vol;
 | 
				
			||||||
 | 
					      double p=blas.benchmark(M,N,K,BATCH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      fprintf(FP,"%d, %d, %d, %d, %f\n", M, N, K, BATCH, p);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<<std::setprecision(3) 
 | 
				
			||||||
 | 
						       << M<<"\t\t"<<N<<"\t\t"<<K<<"\t\t"<<BATCH<<"\t\t"<<p<<std::endl;
 | 
				
			||||||
 | 
					    }}
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "  M  "<<"\t\t"<<"N"<<"\t\t\t"<<"K"<<"\t\t"<<"Gflop/s / rank (block project)"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					    for(int b=0;b<3;b++){
 | 
				
			||||||
 | 
					    for(int r=0;r<3;r++){
 | 
				
			||||||
 | 
					      int M=basis[b];
 | 
				
			||||||
 | 
					      int N=rhs[r];
 | 
				
			||||||
 | 
					      int K=vol;
 | 
				
			||||||
 | 
					      int BATCH=vol;
 | 
				
			||||||
 | 
					      double p=blas.benchmark(M,N,K,BATCH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      fprintf(FP,"%d, %d, %d, %d, %f\n", M, N, K, BATCH, p);
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<<std::setprecision(3) 
 | 
				
			||||||
 | 
						       << M<<"\t\t"<<N<<"\t\t"<<K<<"\t\t"<<BATCH<<"\t\t"<<p<<std::endl;
 | 
				
			||||||
 | 
					    }}
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "  M  "<<"\t\t"<<"N"<<"\t\t\t"<<"K"<<"\t\t"<<"Gflop/s / rank (block promote)"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					    for(int b=0;b<3;b++){
 | 
				
			||||||
 | 
					    for(int r=0;r<3;r++){
 | 
				
			||||||
 | 
					      int M=rhs[r];
 | 
				
			||||||
 | 
					      int N=vol;
 | 
				
			||||||
 | 
					      int K=basis[b];
 | 
				
			||||||
 | 
					      int BATCH=vol;
 | 
				
			||||||
 | 
					      double p=blas.benchmark(M,N,K,BATCH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      fprintf(FP,"%d, %d, %d, %d, %f\n", M, N, K, BATCH, p);
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<<std::setprecision(3) 
 | 
				
			||||||
 | 
						       << M<<"\t\t"<<N<<"\t\t"<<K<<"\t\t"<<BATCH<<"\t\t"<<p<<std::endl;
 | 
				
			||||||
 | 
					    }}
 | 
				
			||||||
 | 
					    fprintf(FP,"\n\n\n");
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void SU4(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    const int Nc4=4;
 | 
				
			||||||
 | 
					    typedef Lattice< iMatrix< vComplexF,Nc4> > LatticeSU4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
 | 
				
			||||||
 | 
					    Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    uint64_t NN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    uint64_t lmax=32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GridSerialRNG          sRNG;      sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
				
			||||||
 | 
					    for(int lat=8;lat<=lmax;lat+=8){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      Coordinate latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
				
			||||||
 | 
					      int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      NN =Grid.NodeCount();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      LatticeSU4 z(&Grid); z=Zero();
 | 
				
			||||||
 | 
					      LatticeSU4 x(&Grid); x=Zero();
 | 
				
			||||||
 | 
					      LatticeSU4 y(&Grid); y=Zero();
 | 
				
			||||||
 | 
					      //      double a=2.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      uint64_t Nloop=NLOOP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      double start=usecond();
 | 
				
			||||||
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
 | 
						z=x*y;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      double stop=usecond();
 | 
				
			||||||
 | 
					      double time = (stop-start)/Nloop*1000;
 | 
				
			||||||
 | 
					     
 | 
				
			||||||
 | 
					      double flops=vol*Nc4*Nc4*(6+(Nc4-1)*8);// mul,add
 | 
				
			||||||
 | 
					      double bytes=3.0*vol*Nc4*Nc4*2*sizeof(RealF);
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<<std::setprecision(3) 
 | 
				
			||||||
 | 
						       << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
 | 
				
			||||||
 | 
						       << "\t\t"<< bytes/time/NN <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static double DWF(int Ls,int L)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD M5  =1.8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    double mflops;
 | 
				
			||||||
 | 
					    double mflops_best = 0;
 | 
				
			||||||
 | 
					    double mflops_worst= 0;
 | 
				
			||||||
 | 
					    std::vector<double> mflops_all;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Set/Get the layout & grid size
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					    Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
 | 
				
			||||||
 | 
					    Coordinate local({L,L,L,L});
 | 
				
			||||||
 | 
					    Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, 
 | 
				
			||||||
 | 
													       GridDefaultSimd(Nd,vComplex::Nsimd()),
 | 
				
			||||||
 | 
													       GridDefaultMpi());
 | 
				
			||||||
 | 
					    uint64_t NP = TmpGrid->RankCount();
 | 
				
			||||||
 | 
					    uint64_t NN = TmpGrid->NodeCount();
 | 
				
			||||||
 | 
					    NN_global=NN;
 | 
				
			||||||
 | 
					    uint64_t SHM=NP/NN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Welcome message ////////////
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Nc             : "<<Nc<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Ls             : "<<Ls<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks          : "<<NP  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* nodes          : "<<NN  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks/node     : "<<SHM <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks geom     : "<<GridCmdVectorIntToString(mpi)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Lattice Init ////////////
 | 
				
			||||||
 | 
					    GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ///////// RNG Init ////////////
 | 
				
			||||||
 | 
					    std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 | 
					    std::vector<int> seeds5({5,6,7,8});
 | 
				
			||||||
 | 
					    GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					    GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef DomainWallFermionF Action;
 | 
				
			||||||
 | 
					    typedef typename Action::FermionField Fermion;
 | 
				
			||||||
 | 
					    typedef LatticeGaugeFieldF Gauge;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ///////// Source preparation ////////////
 | 
				
			||||||
 | 
					    Gauge Umu(UGrid);  SU<Nc>::HotConfiguration(RNG4,Umu); 
 | 
				
			||||||
 | 
					    Fermion src   (FGrid); random(RNG5,src);
 | 
				
			||||||
 | 
					    Fermion src_e (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion src_o (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_e   (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_o   (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_eo  (FGrid);
 | 
				
			||||||
 | 
					    Action Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      pickCheckerboard(Even,src_e,src);
 | 
				
			||||||
 | 
					      pickCheckerboard(Odd,src_o,src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef AVX512
 | 
				
			||||||
 | 
					      const int num_cases = 3;
 | 
				
			||||||
 | 
					#else 
 | 
				
			||||||
 | 
					      const int num_cases = 2;
 | 
				
			||||||
 | 
					#endif      
 | 
				
			||||||
 | 
					      std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      controls Cases [] = {
 | 
				
			||||||
 | 
						{  WilsonKernelsStatic::OptGeneric   ,  WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent },
 | 
				
			||||||
 | 
						{  WilsonKernelsStatic::OptHandUnroll,  WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent },
 | 
				
			||||||
 | 
						{  WilsonKernelsStatic::OptInlineAsm ,  WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent }
 | 
				
			||||||
 | 
					      }; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int c=0;c<num_cases;c++) {
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
 | 
				
			||||||
 | 
						WilsonKernelsStatic::Opt   = Cases[c].Opt;
 | 
				
			||||||
 | 
						CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
						if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
 | 
				
			||||||
 | 
						if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using ASM      WilsonKernels" <<std::endl;
 | 
				
			||||||
 | 
						if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using UNROLLED WilsonKernels" <<std::endl;
 | 
				
			||||||
 | 
						if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
 | 
				
			||||||
 | 
						if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential Comms/Compute" <<std::endl;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int nwarm = 10;
 | 
				
			||||||
 | 
						double t0=usecond();
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						for(int i=0;i<nwarm;i++){
 | 
				
			||||||
 | 
						  Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						double t1=usecond();
 | 
				
			||||||
 | 
						uint64_t ncall = 500;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						FGrid->Broadcast(0,&ncall,sizeof(ncall));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time_statistics timestat;
 | 
				
			||||||
 | 
						std::vector<double> t_time(ncall);
 | 
				
			||||||
 | 
						for(uint64_t i=0;i<ncall;i++){
 | 
				
			||||||
 | 
						  t0=usecond();
 | 
				
			||||||
 | 
						  Dw.DhopEO(src_o,r_e,DaggerNo);
 | 
				
			||||||
 | 
						  t1=usecond();
 | 
				
			||||||
 | 
						  t_time[i] = t1-t0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Nc=3 gives
 | 
				
			||||||
 | 
						// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
 | 
				
			||||||
 | 
						// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2  + Nd*Nc*Ns*2
 | 
				
			||||||
 | 
						//	double flops=(1344.0*volume)/2;
 | 
				
			||||||
 | 
						double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns  + 2*Nd*Nc*Ns*2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						double flops=(fps*volume)/2;
 | 
				
			||||||
 | 
						double mf_hi, mf_lo, mf_err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						timestat.statistics(t_time);
 | 
				
			||||||
 | 
						mf_hi = flops/timestat.min;
 | 
				
			||||||
 | 
						mf_lo = flops/timestat.max;
 | 
				
			||||||
 | 
						mf_err= flops/timestat.min * timestat.err/timestat.mean;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mflops = flops/timestat.mean;
 | 
				
			||||||
 | 
						mflops_all.push_back(mflops);
 | 
				
			||||||
 | 
						if ( mflops_best == 0   ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops_worst== 0   ) mflops_worst= mflops;
 | 
				
			||||||
 | 
						if ( mflops>mflops_best ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops<mflops_worst) mflops_worst= mflops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage<< "Deo FlopsPerSite is "<<fps<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4 x "<<Ls<< " Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage <<fmt << std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int i=0;i<mflops_all.size();i++){
 | 
				
			||||||
 | 
						std::cout<<mflops_all[i]/NN<<" ; " ;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      std::cout<<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return mflops_best;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static double Staggered(int L)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    double mflops;
 | 
				
			||||||
 | 
					    double mflops_best = 0;
 | 
				
			||||||
 | 
					    double mflops_worst= 0;
 | 
				
			||||||
 | 
					    std::vector<double> mflops_all;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Set/Get the layout & grid size
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					    Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
 | 
				
			||||||
 | 
					    Coordinate local({L,L,L,L});
 | 
				
			||||||
 | 
					    Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]});
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(latt4,
 | 
				
			||||||
 | 
													       GridDefaultSimd(Nd,vComplex::Nsimd()),
 | 
				
			||||||
 | 
													       GridDefaultMpi());
 | 
				
			||||||
 | 
					    uint64_t NP = TmpGrid->RankCount();
 | 
				
			||||||
 | 
					    uint64_t NN = TmpGrid->NodeCount();
 | 
				
			||||||
 | 
					    NN_global=NN;
 | 
				
			||||||
 | 
					    uint64_t SHM=NP/NN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Welcome message ////////////
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "Benchmark ImprovedStaggered on "<<L<<"^4 local volume "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks          : "<<NP  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* nodes          : "<<NN  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks/node     : "<<SHM <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks geom     : "<<GridCmdVectorIntToString(mpi)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Lattice Init ////////////
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ///////// RNG Init ////////////
 | 
				
			||||||
 | 
					    std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 | 
					    GridParallelRNG          RNG4(FGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD c1=9.0/8.0;
 | 
				
			||||||
 | 
					    RealD c2=-1.0/24.0;
 | 
				
			||||||
 | 
					    RealD u0=1.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef ImprovedStaggeredFermionF Action;
 | 
				
			||||||
 | 
					    typedef typename Action::FermionField Fermion; 
 | 
				
			||||||
 | 
					    typedef LatticeGaugeFieldF Gauge;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Gauge Umu(FGrid);  SU<Nc>::HotConfiguration(RNG4,Umu); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typename Action::ImplParams params;
 | 
				
			||||||
 | 
					    Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Source preparation ////////////
 | 
				
			||||||
 | 
					    Fermion src   (FGrid); random(RNG4,src);
 | 
				
			||||||
 | 
					    Fermion src_e (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion src_o (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_e   (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_o   (FrbGrid);
 | 
				
			||||||
 | 
					    Fermion r_eo  (FGrid);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      pickCheckerboard(Even,src_e,src);
 | 
				
			||||||
 | 
					      pickCheckerboard(Odd,src_o,src);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					      const int num_cases = 2;
 | 
				
			||||||
 | 
					      std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      controls Cases [] = {
 | 
				
			||||||
 | 
						{  StaggeredKernelsStatic::OptGeneric   ,  StaggeredKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent  },
 | 
				
			||||||
 | 
						{  StaggeredKernelsStatic::OptHandUnroll,  StaggeredKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent  },
 | 
				
			||||||
 | 
						{  StaggeredKernelsStatic::OptInlineAsm ,  StaggeredKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent  }
 | 
				
			||||||
 | 
					      }; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int c=0;c<num_cases;c++) {
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						StaggeredKernelsStatic::Comms = Cases[c].CommsOverlap;
 | 
				
			||||||
 | 
						StaggeredKernelsStatic::Opt   = Cases[c].Opt;
 | 
				
			||||||
 | 
						CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
						if ( StaggeredKernelsStatic::Opt == StaggeredKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc StaggeredKernels" <<std::endl;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						int nwarm = 10;
 | 
				
			||||||
 | 
						double t0=usecond();
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						for(int i=0;i<nwarm;i++){
 | 
				
			||||||
 | 
						  Ds.DhopEO(src_o,r_e,DaggerNo);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						double t1=usecond();
 | 
				
			||||||
 | 
						uint64_t ncall = 500;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						FGrid->Broadcast(0,&ncall,sizeof(ncall));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time_statistics timestat;
 | 
				
			||||||
 | 
						std::vector<double> t_time(ncall);
 | 
				
			||||||
 | 
						for(uint64_t i=0;i<ncall;i++){
 | 
				
			||||||
 | 
						  t0=usecond();
 | 
				
			||||||
 | 
						  Ds.DhopEO(src_o,r_e,DaggerNo);
 | 
				
			||||||
 | 
						  t1=usecond();
 | 
				
			||||||
 | 
						  t_time[i] = t1-t0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						double volume=1;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
				
			||||||
 | 
						double flops=(1146.0*volume)/2;
 | 
				
			||||||
 | 
						double mf_hi, mf_lo, mf_err;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timestat.statistics(t_time);
 | 
				
			||||||
 | 
						mf_hi = flops/timestat.min;
 | 
				
			||||||
 | 
						mf_lo = flops/timestat.max;
 | 
				
			||||||
 | 
						mf_err= flops/timestat.min * timestat.err/timestat.mean;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mflops = flops/timestat.mean;
 | 
				
			||||||
 | 
						mflops_all.push_back(mflops);
 | 
				
			||||||
 | 
						if ( mflops_best == 0   ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops_worst== 0   ) mflops_worst= mflops;
 | 
				
			||||||
 | 
						if ( mflops>mflops_best ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops<mflops_worst) mflops_worst= mflops;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank   "<< mflops/NP<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node   "<< mflops/NN<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4  Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4  Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage <<fmt << std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int i=0;i<mflops_all.size();i++){
 | 
				
			||||||
 | 
						std::cout<<mflops_all[i]/NN<<" ; " ;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      std::cout<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    return mflops_best;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static double Clover(int L)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    double mflops;
 | 
				
			||||||
 | 
					    double mflops_best = 0;
 | 
				
			||||||
 | 
					    double mflops_worst= 0;
 | 
				
			||||||
 | 
					    std::vector<double> mflops_all;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Set/Get the layout & grid size
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					    Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4);
 | 
				
			||||||
 | 
					    Coordinate local({L,L,L,L});
 | 
				
			||||||
 | 
					    Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]});
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    GridCartesian         * TmpGrid   = SpaceTimeGrid::makeFourDimGrid(latt4,
 | 
				
			||||||
 | 
													       GridDefaultSimd(Nd,vComplex::Nsimd()),
 | 
				
			||||||
 | 
													       GridDefaultMpi());
 | 
				
			||||||
 | 
					    uint64_t NP = TmpGrid->RankCount();
 | 
				
			||||||
 | 
					    uint64_t NN = TmpGrid->NodeCount();
 | 
				
			||||||
 | 
					    NN_global=NN;
 | 
				
			||||||
 | 
					    uint64_t SHM=NP/NN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Welcome message ////////////
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "Benchmark Clover on "<<L<<"^4 local volume "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Global volume  : "<<GridCmdVectorIntToString(latt4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks          : "<<NP  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* nodes          : "<<NN  <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks/node     : "<<SHM <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* ranks geom     : "<<GridCmdVectorIntToString(mpi)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "* Using "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Lattice Init ////////////
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ///////// RNG Init ////////////
 | 
				
			||||||
 | 
					    std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 | 
					    GridParallelRNG          RNG4(FGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD csw=1.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef WilsonCloverFermionF Action;
 | 
				
			||||||
 | 
					    typedef typename Action::FermionField Fermion; 
 | 
				
			||||||
 | 
					    typedef LatticeGaugeFieldF Gauge;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Gauge Umu(FGrid);  SU<Nc>::HotConfiguration(RNG4,Umu); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Action Dc(Umu,*FGrid,*FrbGrid,mass,csw,csw);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////// Source preparation ////////////
 | 
				
			||||||
 | 
					    Fermion src   (FGrid); random(RNG4,src);
 | 
				
			||||||
 | 
					    Fermion r     (FGrid);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const int num_cases = 1;
 | 
				
			||||||
 | 
					      std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S ");
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      controls Cases [] = {
 | 
				
			||||||
 | 
						{  WilsonKernelsStatic::OptGeneric   ,  WilsonKernelsStatic::CommsAndCompute  ,CartesianCommunicator::CommunicatorPolicyConcurrent  },
 | 
				
			||||||
 | 
					      }; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int c=0;c<num_cases;c++) {
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						WilsonKernelsStatic::Comms = Cases[c].CommsOverlap;
 | 
				
			||||||
 | 
						WilsonKernelsStatic::Opt   = Cases[c].Opt;
 | 
				
			||||||
 | 
						CartesianCommunicator::SetCommunicatorPolicy(Cases[c].CommsAsynch);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						int nwarm = 10;
 | 
				
			||||||
 | 
						double t0=usecond();
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						for(int i=0;i<nwarm;i++){
 | 
				
			||||||
 | 
						  Dc.M(src,r);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						double t1=usecond();
 | 
				
			||||||
 | 
						uint64_t ncall = 500;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						FGrid->Broadcast(0,&ncall,sizeof(ncall));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//	std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time_statistics timestat;
 | 
				
			||||||
 | 
						std::vector<double> t_time(ncall);
 | 
				
			||||||
 | 
						for(uint64_t i=0;i<ncall;i++){
 | 
				
			||||||
 | 
						  t0=usecond();
 | 
				
			||||||
 | 
						  Dc.M(src,r);
 | 
				
			||||||
 | 
						  t1=usecond();
 | 
				
			||||||
 | 
						  t_time[i] = t1-t0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						FGrid->Barrier();
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						double volume=1;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
 | 
				
			||||||
 | 
						double flops=(1344+ 24+6*6*8*2)*volume;
 | 
				
			||||||
 | 
						double mf_hi, mf_lo, mf_err;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						timestat.statistics(t_time);
 | 
				
			||||||
 | 
						mf_hi = flops/timestat.min;
 | 
				
			||||||
 | 
						mf_lo = flops/timestat.max;
 | 
				
			||||||
 | 
						mf_err= flops/timestat.min * timestat.err/timestat.mean;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mflops = flops/timestat.mean;
 | 
				
			||||||
 | 
						mflops_all.push_back(mflops);
 | 
				
			||||||
 | 
						if ( mflops_best == 0   ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops_worst== 0   ) mflops_worst= mflops;
 | 
				
			||||||
 | 
						if ( mflops>mflops_best ) mflops_best = mflops;
 | 
				
			||||||
 | 
						if ( mflops<mflops_worst) mflops_worst= mflops;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s =   "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per rank   "<< mflops/NP<<std::endl;
 | 
				
			||||||
 | 
						std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per node   "<< mflops/NN<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4  Deo Best  mflop/s        =   "<< mflops_best << " ; " << mflops_best/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L<<"^4  Deo Worst mflop/s        =   "<< mflops_worst<< " ; " << mflops_worst/NN<<" per node " <<std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage <<fmt << std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int i=0;i<mflops_all.size();i++){
 | 
				
			||||||
 | 
						std::cout<<mflops_all[i]/NN<<" ; " ;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      std::cout<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    return mflops_best;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (GlobalSharedMemory::WorldRank==0) { 
 | 
				
			||||||
 | 
					    FP = fopen("Benchmark_usqcd.csv","w");
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    FP = fopen("/dev/null","w");
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
 | 
				
			||||||
 | 
					  LebesgueOrder::Block = std::vector<int>({2,2,2,2});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Benchmark::Decomposition();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int do_su4=0;
 | 
				
			||||||
 | 
					  int do_memory=1;
 | 
				
			||||||
 | 
					  int do_comms =1;
 | 
				
			||||||
 | 
					  int do_blas  =1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int sel=4;
 | 
				
			||||||
 | 
					  std::vector<int> L_list({8,12,16,24,32});
 | 
				
			||||||
 | 
					  int selm1=sel-1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<double> clover;
 | 
				
			||||||
 | 
					  std::vector<double> dwf4;
 | 
				
			||||||
 | 
					  std::vector<double> staggered;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Ls=1;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Clover dslash 4D vectorised (temporarily Wilson)" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  for(int l=0;l<L_list.size();l++){
 | 
				
			||||||
 | 
					    clover.push_back(Benchmark::DWF(1,L_list[l]));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Ls=12;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Domain wall dslash 4D vectorised" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  for(int l=0;l<L_list.size();l++){
 | 
				
			||||||
 | 
					    double result = Benchmark::DWF(Ls,L_list[l]) ;
 | 
				
			||||||
 | 
					    dwf4.push_back(result);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Improved Staggered dslash 4D vectorised" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  for(int l=0;l<L_list.size();l++){
 | 
				
			||||||
 | 
					    double result = Benchmark::Staggered(L_list[l]) ;
 | 
				
			||||||
 | 
					    staggered.push_back(result);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "L \t\t Clover \t\t DWF4 \t\t Staggered" <<std::endl;
 | 
				
			||||||
 | 
					  for(int l=0;l<L_list.size();l++){
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< clover[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int NN=NN_global;
 | 
				
			||||||
 | 
					  if ( do_memory ) {
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    Benchmark::Memory();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( do_blas ) {
 | 
				
			||||||
 | 
					#if defined(GRID_CUDA) || defined(GRID_HIP)     || defined(GRID_SYCL)   
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Batched BLAS benchmark " <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    Benchmark::BLAS();
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( do_su4 ) {
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " SU(4) benchmark " <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    Benchmark::SU4();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if ( do_comms ) {
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    Benchmark::Comms();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Per Node Summary table Ls="<<Ls <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " L \t\t Clover\t\t DWF4\t\t Staggered (GF/s per node)" <<std::endl;
 | 
				
			||||||
 | 
					    fprintf(FP,"Per node summary table\n");
 | 
				
			||||||
 | 
					    fprintf(FP,"\n");
 | 
				
			||||||
 | 
					    fprintf(FP,"L , Wilson, DWF4, Staggered, GF/s per node\n");
 | 
				
			||||||
 | 
					    fprintf(FP,"\n");
 | 
				
			||||||
 | 
					    for(int l=0;l<L_list.size();l++){
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< clover[l]/NN<<" \t "<<dwf4[l]/NN<< " \t "<<staggered[l]/NN<<std::endl;
 | 
				
			||||||
 | 
					      fprintf(FP,"%d , %.0f, %.0f, %.0f\n",L_list[l],clover[l]/NN/1000.,dwf4[l]/NN/1000.,staggered[l]/NN/1000.);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(FP,"\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Comparison point     result: "  << 0.5*(dwf4[sel]+dwf4[selm1])/NN << " Mflop/s per node"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << " Comparison point is 0.5*("<<dwf4[sel]/NN<<"+"<<dwf4[selm1]/NN << ") "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<std::setprecision(3);
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					  fclose(FP);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										26
									
								
								bootstrap.sh
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								bootstrap.sh
									
									
									
									
									
								
							@@ -2,26 +2,20 @@
 | 
				
			|||||||
set -e
 | 
					set -e
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EIGEN_URL='https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2'
 | 
					EIGEN_URL='https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2'
 | 
				
			||||||
##EIGEN_URL='https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.bz2'
 | 
					EIGEN_SHA256SUM='b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626'
 | 
				
			||||||
EIGEN_SHA256SUM='685adf14bd8e9c015b78097c1dc22f2f01343756f196acdc76a678e1ae352e11'
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
echo "-- deploying Eigen source..."
 | 
					echo "-- deploying Eigen source..."
 | 
				
			||||||
ARC=`basename ${EIGEN_URL}`
 | 
					ARC=$(basename ${EIGEN_URL})
 | 
				
			||||||
wget ${EIGEN_URL} --no-check-certificate
 | 
					wget ${EIGEN_URL} --no-check-certificate
 | 
				
			||||||
#if command -v sha256sum; then
 | 
					if command -v sha256sum; then
 | 
				
			||||||
#   echo "$EIGEN_SHA256SUM  $(basename "$EIGEN_URL")" \
 | 
					   echo "$EIGEN_SHA256SUM  $(basename "$EIGEN_URL")" \
 | 
				
			||||||
#      | sha256sum --check || exit 1
 | 
					      | sha256sum --check || exit 1
 | 
				
			||||||
#else
 | 
					else
 | 
				
			||||||
#   echo "WARNING: could not verify checksum, please install sha256sum" >&2
 | 
					   echo "WARNING: could not verify checksum, please install sha256sum" >&2
 | 
				
			||||||
#fi
 | 
					fi
 | 
				
			||||||
./scripts/update_eigen.sh ${ARC}
 | 
					./scripts/update_eigen.sh "${ARC}"
 | 
				
			||||||
rm ${ARC}
 | 
					rm "${ARC}"
 | 
				
			||||||
# patch for non-portable includes in Eigen 3.3.5
 | 
					 | 
				
			||||||
# apparently already fixed in Eigen HEAD so it should not be 
 | 
					 | 
				
			||||||
# a problem in the future (A.P.)
 | 
					 | 
				
			||||||
# patch Eigen/unsupported/Eigen/CXX11/Tensor scripts/eigen-3.3.5.Tensor.patch
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
echo '-- generating Make.inc files...'
 | 
					echo '-- generating Make.inc files...'
 | 
				
			||||||
./scripts/filelist
 | 
					./scripts/filelist
 | 
				
			||||||
echo '-- generating configure script...'
 | 
					echo '-- generating configure script...'
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,8 +0,0 @@
 | 
				
			|||||||
../configure \
 | 
					 | 
				
			||||||
    --enable-comms=none \
 | 
					 | 
				
			||||||
    --enable-simd=GEN \
 | 
					 | 
				
			||||||
    --enable-gen-simd-width=16 \
 | 
					 | 
				
			||||||
    CXX=clang++ \
 | 
					 | 
				
			||||||
    LDFLAGS="-lcudart " \
 | 
					 | 
				
			||||||
    CXXFLAGS="-fopenmp -std=c++14 -fopenmp-cuda-mode  -O3 -target x86_64-pc-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -lcudart -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_70 -DOMPTARGET -DOMPTARGET_MANAGED"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
							
								
								
									
										32
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								configure.ac
									
									
									
									
									
								
							@@ -226,34 +226,25 @@ case ${ac_SFW_FP16} in
 | 
				
			|||||||
esac
 | 
					esac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
############### Default to accelerator cshift, but revert to host if UCX is buggy or other reasons
 | 
					############### Default to accelerator cshift, but revert to host if UCX is buggy or other reasons
 | 
				
			||||||
AC_ARG_ENABLE([accelerator-cshift],
 | 
					AC_ARG_ENABLE([accelerator-aware-mpi],
 | 
				
			||||||
    [AS_HELP_STRING([--enable-accelerator-cshift=yes|no],[run cshift on the device])],
 | 
					    [AS_HELP_STRING([--enable-accelerator-aware-mpi=yes|no],[run mpi transfers from device])],
 | 
				
			||||||
    [ac_ACC_CSHIFT=${enable_accelerator_cshift}], [ac_ACC_CSHIFT=yes])
 | 
					    [ac_ACCELERATOR_AWARE_MPI=${enable_accelerator_aware_mpi}], [ac_ACCELERATOR_AWARE_MPI=yes])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
AC_ARG_ENABLE([ucx-buggy],
 | 
					case ${ac_ACCELERATOR_AWARE_MPI} in
 | 
				
			||||||
    [AS_HELP_STRING([--enable-ucx-buggy=yes|no],[enable workaround for UCX device buffer bugs])],
 | 
					 | 
				
			||||||
    [ac_UCXBUGGY=${enable_ucx_buggy}], [ac_UCXBUGGY=no])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
case ${ac_UCXBUGGY} in
 | 
					 | 
				
			||||||
    yes)
 | 
					    yes)
 | 
				
			||||||
    ac_ACC_CSHIFT=no;;
 | 
					      AC_DEFINE([ACCELERATOR_CSHIFT],[1],[ Cshift runs on host])
 | 
				
			||||||
    *);;
 | 
					      AC_DEFINE([ACCELERATOR_AWARE_MPI],[1],[ Stencil can use device pointers]);;
 | 
				
			||||||
esac
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
case ${ac_ACC_CSHIFT} in
 | 
					 | 
				
			||||||
    yes)
 | 
					 | 
				
			||||||
      AC_DEFINE([ACCELERATOR_CSHIFT],[1],[ UCX device buffer bugs are not present]);;
 | 
					 | 
				
			||||||
    *);;
 | 
					    *);;
 | 
				
			||||||
esac
 | 
					esac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
############### SYCL/CUDA/HIP/OpenMP/none
 | 
					############### SYCL/CUDA/HIP/none
 | 
				
			||||||
AC_ARG_ENABLE([accelerator],
 | 
					AC_ARG_ENABLE([accelerator],
 | 
				
			||||||
    [AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|openmp|none],[enable none,openmp,cuda,sycl,hip acceleration])],
 | 
					    [AS_HELP_STRING([--enable-accelerator=cuda|sycl|hip|none],[enable none,cuda,sycl,hip acceleration])],
 | 
				
			||||||
    [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
 | 
					    [ac_ACCELERATOR=${enable_accelerator}], [ac_ACCELERATOR=none])
 | 
				
			||||||
case ${ac_ACCELERATOR} in
 | 
					case ${ac_ACCELERATOR} in
 | 
				
			||||||
    cuda)
 | 
					    cuda)
 | 
				
			||||||
      echo CUDA acceleration ${ac_ACCELERATOR} ${enable_accelerator}
 | 
					      echo CUDA acceleration
 | 
				
			||||||
      LIBS="${LIBS} -lcuda"
 | 
					      LIBS="${LIBS} -lcuda"
 | 
				
			||||||
      AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
 | 
					      AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
 | 
				
			||||||
    sycl)
 | 
					    sycl)
 | 
				
			||||||
@@ -262,15 +253,12 @@ case ${ac_ACCELERATOR} in
 | 
				
			|||||||
    hip)
 | 
					    hip)
 | 
				
			||||||
      echo HIP acceleration
 | 
					      echo HIP acceleration
 | 
				
			||||||
      AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
 | 
					      AC_DEFINE([GRID_HIP],[1],[Use HIP offload]);;
 | 
				
			||||||
    openmp)
 | 
					 | 
				
			||||||
      echo OMPTARGET acceleration
 | 
					 | 
				
			||||||
      AC_DEFINE([GRID_OMPTARGET],[1],[Use OMPTARGET offload]);;
 | 
					 | 
				
			||||||
    none)
 | 
					    none)
 | 
				
			||||||
      echo NO acceleration    ;;
 | 
					      echo NO acceleration    ;;
 | 
				
			||||||
    no)
 | 
					    no)
 | 
				
			||||||
      echo NO acceleration    ;;
 | 
					      echo NO acceleration    ;;
 | 
				
			||||||
    *)
 | 
					    *)
 | 
				
			||||||
      AC_MSG_ERROR(["1Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
 | 
					      AC_MSG_ERROR(["Acceleration not suppoorted ${ac_ACCELERATOR}"]);;
 | 
				
			||||||
esac
 | 
					esac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
############### UNIFIED MEMORY
 | 
					############### UNIFIED MEMORY
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										183
									
								
								examples/Example_plaquette.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								examples/Example_plaquette.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,183 @@
 | 
				
			|||||||
 | 
					/* 
 | 
				
			||||||
 | 
					 * Example_plaquette.cc                                                               
 | 
				
			||||||
 | 
					 * 
 | 
				
			||||||
 | 
					 * D. Clarke 
 | 
				
			||||||
 | 
					 * 
 | 
				
			||||||
 | 
					 * Here I just want to create an incredibly simple main to get started with GRID and get used
 | 
				
			||||||
 | 
					 * to its syntax. If the reader is like me, they vaguely understand something about lattice coding,
 | 
				
			||||||
 | 
					 * they don't know a ton of C++, don't know much of the fine details, and certainly know nothing about GRID.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Once you've made a new executable, like this one, you can bootstrap.sh again. At this point,
 | 
				
			||||||
 | 
					 * the code should be able to find your new executable. You can tell that bootstrap.sh worked by
 | 
				
			||||||
 | 
					 * having a look at Make.inc. You should see your executable inside there.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Warning: This code illustrative only, not well tested, and not meant for production use. The best
 | 
				
			||||||
 | 
					 * way to read this code is to start at the main.
 | 
				
			||||||
 | 
					 * 
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// All your mains should have this
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This copies what already exists in WilsonLoops.h. The point here is to be pedagogical and explain in
 | 
				
			||||||
 | 
					// detail what everything does so we can see how GRID works.
 | 
				
			||||||
 | 
					template <class Gimpl> class WLoops : public Gimpl {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    // Gimpl seems to be an arbitrary class. Within this class, it is expected that certain types are
 | 
				
			||||||
 | 
					    // already defined, things like Scalar and Field. This macro includes a bunch of #typedefs that
 | 
				
			||||||
 | 
					    // implement this equivalence at compile time.
 | 
				
			||||||
 | 
					    INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Some example Gimpls can be found in GaugeImplementations.h, at the bottom. These are in turn built
 | 
				
			||||||
 | 
					    // out of GaugeImplTypes, which can be found in GaugeImplTypes.h. The GaugeImplTypes contain the base
 | 
				
			||||||
 | 
					    // field/vector/link/whatever types. These inherit from iScalar, iVector, and iMatrix objects, which
 | 
				
			||||||
 | 
					    // are sort of the building blocks for gerenal math objects. The "i" at the beginning of these names
 | 
				
			||||||
 | 
					    // indicates that they should be for internal use only. It seems like these base types have the
 | 
				
			||||||
 | 
					    // acceleration, e.g. SIMD or GPU or what-have-you, abstracted away. How you accelerate these things
 | 
				
			||||||
 | 
					    // appears to be controlled through a template parameter called vtype.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The general math/physics objects, such as a color matrix, are built up by nesting these objects.
 | 
				
			||||||
 | 
					    // For instance a general color matrix has two color indices, so it's built up like
 | 
				
			||||||
 | 
					    //     iScalar<iScalar<iMatrix<vtype ...
 | 
				
			||||||
 | 
					    // where the levels going from the inside out are color, spin, then Lorentz indices. Scalars have
 | 
				
			||||||
 | 
					    // no indices, so it's what we use when such an index isn't needed. Lattice objects are made by one
 | 
				
			||||||
 | 
					    // higher level of indexing using iVector.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // These types will be used for U and U_mu objects, respectively.
 | 
				
			||||||
 | 
					    typedef typename Gimpl::GaugeLinkField GaugeMat;
 | 
				
			||||||
 | 
					    typedef typename Gimpl::GaugeField GaugeLorentz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // U_mu_nu(x)
 | 
				
			||||||
 | 
					    static void dirPlaquette(GaugeMat &plaq, const std::vector<GaugeMat> &U, const int mu, const int nu) {
 | 
				
			||||||
 | 
					        // Calls like CovShiftForward and CovShiftBackward have 3 arguments, and they multiply together
 | 
				
			||||||
 | 
					        // the first and last argument. (Second arg gives the shift direction.) The CovShiftIdentityBackward
 | 
				
			||||||
 | 
					        // has meanwhile only two arguments; it just returns the shifted (adjoint since backward) link. 
 | 
				
			||||||
 | 
					        plaq = Gimpl::CovShiftForward(U[mu],mu,
 | 
				
			||||||
 | 
					                   // Means Link*Cshift(field,mu,1), arguments are Link, mu, field in that order.
 | 
				
			||||||
 | 
					                   Gimpl::CovShiftForward(U[nu],nu,
 | 
				
			||||||
 | 
					                       Gimpl::CovShiftBackward(U[mu],mu,
 | 
				
			||||||
 | 
					                           // This means Cshift(adj(Link), mu, -1)
 | 
				
			||||||
 | 
					                           Gimpl::CovShiftIdentityBackward(U[nu], nu))));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // tr U_mu_nu(x)
 | 
				
			||||||
 | 
					    static void traceDirPlaquette(ComplexField &plaq, const std::vector<GaugeMat> &U, const int mu, const int nu) {
 | 
				
			||||||
 | 
					        // This .Grid() syntax seems to get the pointer to the GridBase. Apparently this is needed as argument
 | 
				
			||||||
 | 
					        // to instantiate a Lattice object.
 | 
				
			||||||
 | 
					        GaugeMat sp(U[0].Grid());
 | 
				
			||||||
 | 
					        dirPlaquette(sp, U, mu, nu);
 | 
				
			||||||
 | 
					        plaq = trace(sp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // sum_mu_nu tr U_mu_nu(x)
 | 
				
			||||||
 | 
					    static void sitePlaquette(ComplexField &Plaq, const std::vector<GaugeMat> &U) {
 | 
				
			||||||
 | 
					        ComplexField sitePlaq(U[0].Grid());
 | 
				
			||||||
 | 
					        Plaq = Zero();
 | 
				
			||||||
 | 
					        // Nd=4 and Nc=3 are set as global constants in QCD.h
 | 
				
			||||||
 | 
					        for (int mu = 1; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					            for (int nu = 0; nu < mu; nu++) {
 | 
				
			||||||
 | 
					                traceDirPlaquette(sitePlaq, U, mu, nu);
 | 
				
			||||||
 | 
					                Plaq = Plaq + sitePlaq;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // sum_mu_nu_x Re tr U_mu_nu(x)
 | 
				
			||||||
 | 
					    static RealD sumPlaquette(const GaugeLorentz &Umu) {
 | 
				
			||||||
 | 
					        std::vector<GaugeMat> U(Nd, Umu.Grid());
 | 
				
			||||||
 | 
					        for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					            // Umu is a GaugeLorentz object, and as such has a non-trivial Lorentz index. We can
 | 
				
			||||||
 | 
					            // access the element in the mu Lorentz index with this PeekIndex syntax.
 | 
				
			||||||
 | 
					            U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        ComplexField Plaq(Umu.Grid());
 | 
				
			||||||
 | 
					        sitePlaquette(Plaq, U);
 | 
				
			||||||
 | 
					        // I guess this should be the line that sums over all space-time sites.
 | 
				
			||||||
 | 
					        auto Tp = sum(Plaq);
 | 
				
			||||||
 | 
					        // Until now, we have been working with objects inside the tensor nest. This TensorRemove gets
 | 
				
			||||||
 | 
					        // rid of the tensor nest to return whatever is inside.
 | 
				
			||||||
 | 
					        auto p  = TensorRemove(Tp);
 | 
				
			||||||
 | 
					        return p.real();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // < Re tr U_mu_nu(x) >
 | 
				
			||||||
 | 
					    static RealD avgPlaquette(const GaugeLorentz &Umu) {
 | 
				
			||||||
 | 
					        // Real double type
 | 
				
			||||||
 | 
					        RealD sumplaq = sumPlaquette(Umu);
 | 
				
			||||||
 | 
					        // gSites() is the number of global sites. there is also lSites() for local sites.
 | 
				
			||||||
 | 
					        double vol = Umu.Grid()->gSites();
 | 
				
			||||||
 | 
					        // The number of orientations. 4*3/2=6 for Nd=4, as known.
 | 
				
			||||||
 | 
					        double faces = (1.0 * Nd * (Nd - 1)) / 2.0;
 | 
				
			||||||
 | 
					        return sumplaq / vol / faces / Nc;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Next we show an example of how to construct an input parameter class. We first inherit
 | 
				
			||||||
 | 
					// from Serializable. Then all class data members have to be defined using the
 | 
				
			||||||
 | 
					// GRID_SERIALIZABLE_CLASS_MEMBERS macro. This variadic macro allows for arbitrarily many
 | 
				
			||||||
 | 
					// class data members. In the below case, we make a parameter file holding the configuration
 | 
				
			||||||
 | 
					// name. Here, it expects the name to be labeled with "conf_name" in the configuration file. 
 | 
				
			||||||
 | 
					struct ConfParameters: Serializable {
 | 
				
			||||||
 | 
					    GRID_SERIALIZABLE_CLASS_MEMBERS(
 | 
				
			||||||
 | 
					        ConfParameters,
 | 
				
			||||||
 | 
					        std::string, conf_name);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    template <class ReaderClass>
 | 
				
			||||||
 | 
					    ConfParameters(Reader<ReaderClass>& Reader){
 | 
				
			||||||
 | 
					        // If we are reading an XML file, it should be structured like:
 | 
				
			||||||
 | 
					        // <grid>
 | 
				
			||||||
 | 
					        //   <parameters>
 | 
				
			||||||
 | 
					        //     <conf_name>l20t20b06498a_nersc.302500</conf_name>
 | 
				
			||||||
 | 
					        //   </parameters>
 | 
				
			||||||
 | 
					        // </grid>
 | 
				
			||||||
 | 
					        read(Reader, "parameters", *this);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This syntax lets you pass command line arguments to main. An asterisk means that what follows is
 | 
				
			||||||
 | 
					// a pointer. Two asterisks means what follows is a pointer to an array. 
 | 
				
			||||||
 | 
					int main (int argc, char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    // This initializes Grid. Some command line options include
 | 
				
			||||||
 | 
					    //   --mpi n.n.n.n
 | 
				
			||||||
 | 
					    //   --threads n
 | 
				
			||||||
 | 
					    //   --grid n.n.n.n
 | 
				
			||||||
 | 
					    Grid_init(&argc, &argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // This is where you would specify a custom lattice size, if not from the command line. Here
 | 
				
			||||||
 | 
					    // Nd is a global quantity that is currently set to 4.
 | 
				
			||||||
 | 
					    Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
				
			||||||
 | 
					    Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					    Coordinate latt_size   = GridDefaultLatt();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Instantiate the spacetime Grid on which everything will be built.
 | 
				
			||||||
 | 
					    GridCartesian GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The PeriodicGimplD type is what you want for gauge matrices. There is also a LatticeGaugeFieldD
 | 
				
			||||||
 | 
					    // type that you can use, which will work perfectly with what follows. 
 | 
				
			||||||
 | 
					    PeriodicGimplD::Field U(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Here we read in the parameter file params.json to get conf_name. The last argument is what the
 | 
				
			||||||
 | 
					    // top organizational level is called in the param file. 
 | 
				
			||||||
 | 
					    XmlReader Reader("Example_plaquette.xml",false, "grid");
 | 
				
			||||||
 | 
					    ConfParameters param(Reader);  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Load a lattice from SIMULATeQCD into U. SIMULATeQCD finds plaquette = 0.6381995717
 | 
				
			||||||
 | 
					    FieldMetaData header;
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(U, header, param.conf_name);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Let's see what we find.
 | 
				
			||||||
 | 
					    RealD plaq = WLoops<PeriodicGimplD>::avgPlaquette(U);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // This is how you make log messages.
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1) << "Plaquette = " << plaq << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // To wrap things up.
 | 
				
			||||||
 | 
					    Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1,6 +0,0 @@
 | 
				
			|||||||
#!/bin/sh -f
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module load cgpu 
 | 
					 | 
				
			||||||
module load gcc/8.3.0
 | 
					 | 
				
			||||||
module load cuda/11.0.3
 | 
					 | 
				
			||||||
module load llvm/14.0.0-git_20211104b
 | 
					 | 
				
			||||||
@@ -1,19 +0,0 @@
 | 
				
			|||||||
--- ./Eigen/unsupported/Eigen/CXX11/Tensor	2018-07-23 10:33:42.000000000 +0100
 | 
					 | 
				
			||||||
+++ Tensor	2018-08-28 16:15:56.000000000 +0100
 | 
					 | 
				
			||||||
@@ -25,7 +25,7 @@
 | 
					 | 
				
			||||||
 #include <utility>
 | 
					 | 
				
			||||||
 #endif
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
-#include <Eigen/src/Core/util/DisableStupidWarnings.h>
 | 
					 | 
				
			||||||
+#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
 #include "../SpecialFunctions"
 | 
					 | 
				
			||||||
 #include "src/util/CXX11Meta.h"
 | 
					 | 
				
			||||||
@@ -147,6 +147,6 @@
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
 #include "src/Tensor/TensorIO.h"
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
-#include <Eigen/src/Core/util/ReenableStupidWarnings.h>
 | 
					 | 
				
			||||||
+#include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
 //#endif // EIGEN_CXX11_TENSOR_MODULE
 | 
					 | 
				
			||||||
@@ -25,12 +25,16 @@ export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
					#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
export MPICH_OFI_NIC_POLICY=GPU
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					export FI_CXI_CQ_FILL_PERCENT=10
 | 
				
			||||||
 | 
					export FI_CXI_DEFAULT_CQ_SIZE=262144
 | 
				
			||||||
 | 
					#export FI_CXI_DEFAULT_CQ_SIZE=131072
 | 
				
			||||||
 | 
					#export FI_CXI_CQ_FILL_PERCENT=20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# 12 ppn, 32 nodes, 384 ranks
 | 
					# 12 ppn, 32 nodes, 384 ranks
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
@@ -45,12 +49,12 @@ CMD="mpiexec -np 12288 -ppn 12  -envall \
 | 
				
			|||||||
	     ./gpu_tile_compact.sh \
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
	     ./Benchmark_dwf_fp32 --mpi 8.8.8.24 --grid 128.128.128.384 \
 | 
						     ./Benchmark_dwf_fp32 --mpi 8.8.8.24 --grid 128.128.128.384 \
 | 
				
			||||||
		--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
 | 
				
			||||||
$CMD | tee 1024node.dwf.small
 | 
					$CMD | tee 1024node.dwf.small.cq
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CMD="mpiexec -np 12288 -ppn 12  -envall \
 | 
					CMD="mpiexec -np 12288 -ppn 12  -envall \
 | 
				
			||||||
	     ./gpu_tile_compact.sh \
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
	     ./Benchmark_dwf_fp32 --mpi 16.8.8.12 --grid 256.256.256.384 \
 | 
						     ./Benchmark_dwf_fp32 --mpi 16.8.8.12 --grid 256.256.256.384 \
 | 
				
			||||||
		--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32 --comms-overlap"
 | 
				
			||||||
$CMD | tee 1024node.dwf
 | 
					$CMD | tee 1024node.dwf.cq
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -17,6 +17,7 @@ source ../sourceme.sh
 | 
				
			|||||||
export OMP_NUM_THREADS=3
 | 
					export OMP_NUM_THREADS=3
 | 
				
			||||||
export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
 | 
				
			||||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
 | 
				
			||||||
#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
 | 
					#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
 | 
				
			||||||
@@ -35,11 +36,25 @@ CMD="mpiexec -np 24 -ppn 12  -envall \
 | 
				
			|||||||
	     ./Benchmark_comms_host_device --mpi 2.3.2.2 --grid 32.24.32.192 \
 | 
						     ./Benchmark_comms_host_device --mpi 2.3.2.2 --grid 32.24.32.192 \
 | 
				
			||||||
		--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$CMD 
 | 
					#$CMD 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CMD="mpiexec -np 24 -ppn 12  -envall \
 | 
					CMD="mpiexec -np 24 -ppn 12  -envall \
 | 
				
			||||||
	     ./gpu_tile_compact.sh \
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
	     ./Benchmark_dwf_fp32 --mpi 2.3.2.2 --grid 64.96.64.64 --comms-overlap \
 | 
						     ./Benchmark_dwf_fp32 --mpi 2.3.2.2 --grid 64.96.64.64 --comms-overlap \
 | 
				
			||||||
		--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#$CMD 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 1 -ppn 1  -envall \
 | 
				
			||||||
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ./Benchmark_dwf --mpi 1.1.1.1 --grid 16.32.32.32 --comms-sequential \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					$CMD 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 1 -ppn 1  -envall \
 | 
				
			||||||
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 16.32.32.32 --comms-sequential \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$CMD 
 | 
					$CMD 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,16 +1,16 @@
 | 
				
			|||||||
TOOLS=$HOME/tools
 | 
					
 | 
				
			||||||
../../configure \
 | 
					../../configure \
 | 
				
			||||||
	--enable-simd=GPU \
 | 
						--enable-simd=GPU \
 | 
				
			||||||
	--enable-gen-simd-width=64 \
 | 
						--enable-gen-simd-width=64 \
 | 
				
			||||||
	--enable-comms=mpi-auto \
 | 
						--enable-comms=mpi-auto \
 | 
				
			||||||
	--enable-accelerator-cshift \
 | 
					 | 
				
			||||||
	--disable-gparity \
 | 
						--disable-gparity \
 | 
				
			||||||
	--disable-fermion-reps \
 | 
						--disable-fermion-reps \
 | 
				
			||||||
	--enable-shm=nvlink \
 | 
						--enable-shm=nvlink \
 | 
				
			||||||
	--enable-accelerator=sycl \
 | 
						--enable-accelerator=sycl \
 | 
				
			||||||
 | 
						--enable-accelerator-aware-mpi=no\
 | 
				
			||||||
	--enable-unified=no \
 | 
						--enable-unified=no \
 | 
				
			||||||
	MPICXX=mpicxx \
 | 
						MPICXX=mpicxx \
 | 
				
			||||||
	CXX=icpx \
 | 
						CXX=icpx \
 | 
				
			||||||
	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$TOOLS/lib64/" \
 | 
						LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -lsycl" \
 | 
				
			||||||
	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
 | 
						CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -qmkl=parallel"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										2
									
								
								systems/Aurora/sourceme-sunspot-deterministic.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								systems/Aurora/sourceme-sunspot-deterministic.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
				
			|||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
@@ -3,6 +3,19 @@
 | 
				
			|||||||
module use /soft/modulefiles
 | 
					module use /soft/modulefiles
 | 
				
			||||||
module load intel_compute_runtime/release/agama-devel-682.22
 | 
					module load intel_compute_runtime/release/agama-devel-682.22
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export FI_CXI_DEFAULT_CQ_SIZE=131072
 | 
				
			||||||
 | 
					export FI_CXI_CQ_FILL_PERCENT=20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
 | 
				
			||||||
 | 
					#export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-intel-enable-auto-large-GRF-mode"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# -ftarget-register-alloc-mode=pvc:default 
 | 
				
			||||||
 | 
					# -ftarget-register-alloc-mode=pvc:small
 | 
				
			||||||
 | 
					# -ftarget-register-alloc-mode=pvc:large
 | 
				
			||||||
 | 
					# -ftarget-register-alloc-mode=pvc:auto
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
 | 
					export HTTP_PROXY=http://proxy.alcf.anl.gov:3128
 | 
				
			||||||
export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
 | 
					export HTTPS_PROXY=http://proxy.alcf.anl.gov:3128
 | 
				
			||||||
export http_proxy=http://proxy.alcf.anl.gov:3128
 | 
					export http_proxy=http://proxy.alcf.anl.gov:3128
 | 
				
			||||||
@@ -10,3 +23,4 @@ export https_proxy=http://proxy.alcf.anl.gov:3128
 | 
				
			|||||||
#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
 | 
					#export MPIR_CVAR_CH4_OFI_ENABLE_HMEM=1
 | 
				
			||||||
git config --global http.proxy http://proxy.alcf.anl.gov:3128
 | 
					git config --global http.proxy http://proxy.alcf.anl.gov:3128
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										41
									
								
								systems/Aurora/tests/repro128.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								systems/Aurora/tests/repro128.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -q EarlyAppAccess
 | 
				
			||||||
 | 
					#PBS -l select=128
 | 
				
			||||||
 | 
					#PBS -l walltime=02:00:00
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					source ../sourceme.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=3
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 12 ppn, 16 nodes, 192 ranks
 | 
				
			||||||
 | 
					# 12 ppn, 128 nodes, 1536 ranks
 | 
				
			||||||
 | 
					CMD="mpiexec -np 1536 -ppn 12  -envall \
 | 
				
			||||||
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ./Test_dwf_mixedcg_prec --mpi 4.4.4.24 --grid 128.128.128.384 \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 7000 --comms-overlap "
 | 
				
			||||||
 | 
					$CMD 
 | 
				
			||||||
							
								
								
									
										61
									
								
								systems/Aurora/tests/repro16.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								systems/Aurora/tests/repro16.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,61 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=16:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=01:00:00
 | 
				
			||||||
 | 
					#PBS -N dwf
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#source ../sourceme.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export MPICH_COLL_SYNC=1
 | 
				
			||||||
 | 
					#export MPICH_ENV_DISPLAY=1
 | 
				
			||||||
 | 
					export MPICH_
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=3
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
 | 
					#export LD_LIBRARY_PATH=/soft/restricted/CNDA/updates/2023.05.15.001/oneapi/compiler/eng-20230512/compiler/linux/lib/:$LD_LIBRARY_PATH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DIR=repro.$PBS_JOBID
 | 
				
			||||||
 | 
					mkdir $DIR
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 192 -ppn 12  -envall \
 | 
				
			||||||
 | 
						     ../gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ../Test_dwf_mixedcg_prec --mpi 2.4.4.6 --grid 64.128.128.192 \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000 --debug-stdout --log Message,Iterative"
 | 
				
			||||||
 | 
					#--comms-overlap
 | 
				
			||||||
 | 
					$CMD 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					grep Oops Grid.stderr.* > failures.$PBS_JOBID
 | 
				
			||||||
 | 
					rm core.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										82
									
								
								systems/Aurora/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								systems/Aurora/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=16:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=02:00:00
 | 
				
			||||||
 | 
					#PBS -N repro1gpu
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 56 cores / 6 threads ~9
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=6
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NN=`cat $PBS_NODEFILE | wc -l`
 | 
				
			||||||
 | 
					echo $PBS_NODEFILE
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $NN nodes in node file
 | 
				
			||||||
 | 
					for n in `eval echo {1..$NN}`
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					echo Node $n is $THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for g in {0..11}
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					export NUMA_MAP=(0 0 0 1 1 1 0 0 0 1 1 1 )
 | 
				
			||||||
 | 
					export TILE_MAP=(0 0 0 0 0 0 1 1 1 1 1 1 )
 | 
				
			||||||
 | 
					export  GPU_MAP=(0 1 2 3 4 5 0 1 2 3 4 5 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export numa=${NUMA_MAP[$g]}
 | 
				
			||||||
 | 
					export gpu_id=${GPU_MAP[$g]}
 | 
				
			||||||
 | 
					export tile_id=${TILE_MAP[$g]}
 | 
				
			||||||
 | 
					export gpu=$gpu_id.$tile_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DIR=repro.1gpu.$PBS_JOBID/node-$n-$THIS_NODE-GPU-$gpu
 | 
				
			||||||
 | 
					mkdir -p $DIR
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $THIS_NODE > nodefile
 | 
				
			||||||
 | 
					echo $gpu > gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export ZE_AFFINITY_MASK=$gpu
 | 
				
			||||||
 | 
					export ONEAPI_DEVICE_FILTER=gpu,level_zero
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 1 -ppn 1  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
						     numactl -N $numa -m $numa ../../Test_dwf_mixedcg_prec --mpi 1.1.1.1 --grid 16.16.32.32 \
 | 
				
			||||||
 | 
							--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message"
 | 
				
			||||||
 | 
					echo $CMD
 | 
				
			||||||
 | 
					$CMD &
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					wait
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										98
									
								
								systems/Aurora/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								systems/Aurora/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,98 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=32:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=02:00:00
 | 
				
			||||||
 | 
					#PBS -N reproN
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 56 cores / 6 threads ~9
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=6
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=1
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
 | 
				
			||||||
 | 
					export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export GRID_PRINT_ENTIRE_LOG=0
 | 
				
			||||||
 | 
					export GRID_CHECKSUM_RECV_BUF=0
 | 
				
			||||||
 | 
					export GRID_CHECKSUM_SEND_BUF=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NN=`cat $PBS_NODEFILE | wc -l`
 | 
				
			||||||
 | 
					echo $PBS_NODEFILE
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $NN nodes in node file
 | 
				
			||||||
 | 
					for n in `eval echo {1..$NN}`
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					echo Node $n is $THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DIR=reproN.$PBS_JOBID/node-$n-$THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mkdir -p $DIR
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $THIS_NODE > nodefile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#CMD="mpiexec -np 12 -ppn 12  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
					#	     ../../gpu_tile_compact.sh \
 | 
				
			||||||
 | 
					#	     ../../Test_dwf_mixedcg_prec --mpi 1.2.2.3 --grid 32.64.64.96 \
 | 
				
			||||||
 | 
					#		--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 12 -ppn 12  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
						     ../../gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ../../Test_dwf_mixedcg_prec --mpi 1.2.2.3 --grid 32.64.64.96 \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $CMD > command-line
 | 
				
			||||||
 | 
					env > environment
 | 
				
			||||||
 | 
					$CMD &
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Suspicious wait is allowing jobs to collide and knock out
 | 
				
			||||||
 | 
					#wait
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sleep 6500
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for n in ` eval echo {1..$NN} `
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					DIR=reproN.$PBS_JOBID/node-$n-$THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					grep Oops Grid.stderr.* > failures.$PBS_JOBID
 | 
				
			||||||
 | 
					rm core.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
							
								
								
									
										40
									
								
								systems/Aurora/tests/solver/stag16.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								systems/Aurora/tests/solver/stag16.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## qsub -q EarlyAppAccess -A Aurora_Deployment -I -l select=1 -l walltime=60:00
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -q EarlyAppAccess
 | 
				
			||||||
 | 
					#PBS -l select=16
 | 
				
			||||||
 | 
					#PBS -l walltime=01:00:00
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					source ../../sourceme.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=3
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
 | 
				
			||||||
 | 
					#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 12 ppn, 16 nodes, 192 ranks
 | 
				
			||||||
 | 
					CMD="mpiexec -np 192 -ppn 12  -envall \
 | 
				
			||||||
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ./Test_staggered_cg_prec --mpi 2.4.4.6 --grid 128.128.128.192 \
 | 
				
			||||||
 | 
						     --shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 3000 --comms-overlap"
 | 
				
			||||||
 | 
					$CMD 
 | 
				
			||||||
							
								
								
									
										70
									
								
								systems/Booster/benchmarks/Benchmark_usqcd.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								systems/Booster/benchmarks/Benchmark_usqcd.csv
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
				
			|||||||
 | 
					Memory Bandwidth
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Bytes, GB/s per node
 | 
				
			||||||
 | 
					3145728, 225.900365
 | 
				
			||||||
 | 
					50331648, 2858.859504
 | 
				
			||||||
 | 
					254803968, 4145.556367
 | 
				
			||||||
 | 
					805306368, 4905.772480
 | 
				
			||||||
 | 
					1966080000, 4978.312557
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					GEMM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 M, N, K, BATCH, GF/s per rank
 | 
				
			||||||
 | 
					16, 8, 16, 256, 1.713639
 | 
				
			||||||
 | 
					16, 16, 16, 256, 288.268316
 | 
				
			||||||
 | 
					16, 32, 16, 256, 597.053950
 | 
				
			||||||
 | 
					32, 8, 32, 256, 557.382591
 | 
				
			||||||
 | 
					32, 16, 32, 256, 1100.145311
 | 
				
			||||||
 | 
					32, 32, 32, 256, 1885.080449
 | 
				
			||||||
 | 
					64, 8, 64, 256, 1725.163599
 | 
				
			||||||
 | 
					64, 16, 64, 256, 3389.336566
 | 
				
			||||||
 | 
					64, 32, 64, 256, 4168.252422
 | 
				
			||||||
 | 
					16, 8, 256, 256, 1326.262134
 | 
				
			||||||
 | 
					16, 16, 256, 256, 2318.095475
 | 
				
			||||||
 | 
					16, 32, 256, 256, 3555.436503
 | 
				
			||||||
 | 
					32, 8, 256, 256, 1920.139170
 | 
				
			||||||
 | 
					32, 16, 256, 256, 3486.174753
 | 
				
			||||||
 | 
					32, 32, 256, 256, 5320.821724
 | 
				
			||||||
 | 
					64, 8, 256, 256, 2539.597502
 | 
				
			||||||
 | 
					64, 16, 256, 256, 5003.456775
 | 
				
			||||||
 | 
					64, 32, 256, 256, 7837.531562
 | 
				
			||||||
 | 
					8, 256, 16, 256, 1427.848170
 | 
				
			||||||
 | 
					16, 256, 16, 256, 2222.147815
 | 
				
			||||||
 | 
					32, 256, 16, 256, 2877.121715
 | 
				
			||||||
 | 
					8, 256, 32, 256, 1922.890086
 | 
				
			||||||
 | 
					16, 256, 32, 256, 3199.469082
 | 
				
			||||||
 | 
					32, 256, 32, 256, 4845.405343
 | 
				
			||||||
 | 
					8, 256, 64, 256, 2639.483343
 | 
				
			||||||
 | 
					16, 256, 64, 256, 5012.800299
 | 
				
			||||||
 | 
					32, 256, 64, 256, 7216.006882
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Communications
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Packet bytes, direction, GB/s per node
 | 
				
			||||||
 | 
					4718592, 2, 206.570734
 | 
				
			||||||
 | 
					4718592, 3, 207.501847
 | 
				
			||||||
 | 
					4718592, 6, 189.730277
 | 
				
			||||||
 | 
					4718592, 7, 204.301218
 | 
				
			||||||
 | 
					15925248, 2, 307.882997
 | 
				
			||||||
 | 
					15925248, 3, 287.901076
 | 
				
			||||||
 | 
					15925248, 6, 295.603109
 | 
				
			||||||
 | 
					15925248, 7, 300.682033
 | 
				
			||||||
 | 
					37748736, 2, 331.740364
 | 
				
			||||||
 | 
					37748736, 3, 338.610627
 | 
				
			||||||
 | 
					37748736, 6, 332.580657
 | 
				
			||||||
 | 
					37748736, 7, 336.336579
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Per node summary table
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					L , Wilson, DWF4, Staggered, GF/s per node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8 , 16, 1165, 10
 | 
				
			||||||
 | 
					12 , 473, 4901, 163
 | 
				
			||||||
 | 
					16 , 1436, 8464, 442
 | 
				
			||||||
 | 
					24 , 4133, 10139, 1530
 | 
				
			||||||
 | 
					32 , 5726, 11487, 2518
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		
		
			
  | 
@@ -5,10 +5,12 @@ LIME=/p/home/jusers/boyle2/juwels/gm2dwf/boyle/
 | 
				
			|||||||
    --enable-gen-simd-width=64 \
 | 
					    --enable-gen-simd-width=64 \
 | 
				
			||||||
    --enable-shm=nvlink \
 | 
					    --enable-shm=nvlink \
 | 
				
			||||||
    --enable-accelerator=cuda \
 | 
					    --enable-accelerator=cuda \
 | 
				
			||||||
 | 
					    --disable-gparity \
 | 
				
			||||||
 | 
					    --disable-fermion-reps \
 | 
				
			||||||
    --with-lime=$LIME \
 | 
					    --with-lime=$LIME \
 | 
				
			||||||
    --disable-accelerator-cshift \
 | 
					    --enable-accelerator-cshift \
 | 
				
			||||||
    --disable-unified \
 | 
					    --disable-unified \
 | 
				
			||||||
    CXX=nvcc \
 | 
					    CXX=nvcc \
 | 
				
			||||||
    LDFLAGS="-cudart shared " \
 | 
					    LDFLAGS="-cudart shared " \
 | 
				
			||||||
    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
 | 
					    CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared -lcublas"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,5 @@
 | 
				
			|||||||
module load GCC/9.3.0       
 | 
					module load GCC
 | 
				
			||||||
module load  GMP/6.2.0   
 | 
					module load GMP
 | 
				
			||||||
module load MPFR/4.1.0     
 | 
					module load MPFR
 | 
				
			||||||
module load OpenMPI/4.1.0rc1  
 | 
					module load OpenMPI
 | 
				
			||||||
module load CUDA/11.3
 | 
					module load CUDA
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,7 +16,7 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
 | 
				
			|||||||
--disable-fermion-reps \
 | 
					--disable-fermion-reps \
 | 
				
			||||||
CXX=hipcc MPICXX=mpicxx \
 | 
					CXX=hipcc MPICXX=mpicxx \
 | 
				
			||||||
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 -fgpu-sanitize" \
 | 
					CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 -fgpu-sanitize" \
 | 
				
			||||||
 LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 "
 | 
					 LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64  -lhipblas -lrocblas"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,3 +1,5 @@
 | 
				
			|||||||
export https_proxy=http://proxy-chain.intel.com:911
 | 
					export https_proxy=http://proxy-chain.intel.com:911
 | 
				
			||||||
module load intel-release
 | 
					module load intel-release
 | 
				
			||||||
module load intel/mpich
 | 
					module load intel/mpich
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
TOOLS=$HOME/tools
 | 
					
 | 
				
			||||||
../../configure \
 | 
					../../configure \
 | 
				
			||||||
	--enable-simd=GPU \
 | 
						--enable-simd=GPU \
 | 
				
			||||||
	--enable-gen-simd-width=64 \
 | 
						--enable-gen-simd-width=64 \
 | 
				
			||||||
@@ -11,6 +11,6 @@ TOOLS=$HOME/tools
 | 
				
			|||||||
	--enable-unified=no \
 | 
						--enable-unified=no \
 | 
				
			||||||
	MPICXX=mpicxx \
 | 
						MPICXX=mpicxx \
 | 
				
			||||||
	CXX=icpx \
 | 
						CXX=icpx \
 | 
				
			||||||
	LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$TOOLS/lib64/" \
 | 
						LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -lsycl" \
 | 
				
			||||||
	CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
 | 
						CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -qmkl=parallel"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										2
									
								
								systems/Sunspot/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								systems/Sunspot/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
				
			|||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
							
								
								
									
										81
									
								
								systems/Sunspot/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								systems/Sunspot/tests/repro1gpu.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,81 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=16:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=02:00:00
 | 
				
			||||||
 | 
					#PBS -N repro1gpu
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 56 cores / 6 threads ~9
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=6
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NN=`cat $PBS_NODEFILE | wc -l`
 | 
				
			||||||
 | 
					echo $PBS_NODEFILE
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $NN nodes in node file
 | 
				
			||||||
 | 
					for n in `eval echo {1..$NN}`
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					echo Node $n is $THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for g in {0..11}
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					export NUMA_MAP=(0 0 0 1 1 1 0 0 0 1 1 1 )
 | 
				
			||||||
 | 
					export TILE_MAP=(0 0 0 0 0 0 1 1 1 1 1 1 )
 | 
				
			||||||
 | 
					export  GPU_MAP=(0 1 2 3 4 5 0 1 2 3 4 5 )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export numa=${NUMA_MAP[$g]}
 | 
				
			||||||
 | 
					export gpu_id=${GPU_MAP[$g]}
 | 
				
			||||||
 | 
					export tile_id=${TILE_MAP[$g]}
 | 
				
			||||||
 | 
					export gpu=$gpu_id.$tile_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DIR=repro.1gpu.$PBS_JOBID/node-$n-$THIS_NODE-GPU-$gpu
 | 
				
			||||||
 | 
					mkdir -p $DIR
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $THIS_NODE > nodefile
 | 
				
			||||||
 | 
					echo $gpu > gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export ZE_AFFINITY_MASK=$gpu
 | 
				
			||||||
 | 
					export ONEAPI_DEVICE_FILTER=gpu,level_zero
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 1 -ppn 1  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
						     numactl -N $numa -m $numa ../../Test_dwf_mixedcg_prec --mpi 1.1.1.1 --grid 16.16.32.32 \
 | 
				
			||||||
 | 
							--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message"
 | 
				
			||||||
 | 
					echo $CMD
 | 
				
			||||||
 | 
					$CMD &
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					wait
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										97
									
								
								systems/Sunspot/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								systems/Sunspot/tests/reproN.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=32:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=02:00:00
 | 
				
			||||||
 | 
					#PBS -N reproN
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					#unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module load oneapi/eng-compiler/2023.05.15.003
 | 
				
			||||||
 | 
					module load mpich/51.2/icc-all-deterministic-pmix-gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# 56 cores / 6 threads ~9
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=6
 | 
				
			||||||
 | 
					export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
 | 
				
			||||||
 | 
					#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
 | 
				
			||||||
 | 
					#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=1
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export GRID_PRINT_ENTIRE_LOG=0
 | 
				
			||||||
 | 
					export GRID_CHECKSUM_RECV_BUF=1
 | 
				
			||||||
 | 
					export GRID_CHECKSUM_SEND_BUF=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPICH_OFI_NIC_POLICY=GPU
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
 | 
				
			||||||
 | 
					export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NN=`cat $PBS_NODEFILE | wc -l`
 | 
				
			||||||
 | 
					echo $PBS_NODEFILE
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $NN nodes in node file
 | 
				
			||||||
 | 
					for n in `eval echo {1..$NN}`
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					echo Node $n is $THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DIR=reproN.$PBS_JOBID/node-$n-$THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mkdir -p $DIR
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $THIS_NODE > nodefile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#CMD="mpiexec -np 12 -ppn 12  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
					#	     ../../gpu_tile_compact.sh \
 | 
				
			||||||
 | 
					#	     ../../Test_dwf_mixedcg_prec --mpi 1.2.2.3 --grid 32.64.64.96 \
 | 
				
			||||||
 | 
					#		--shm-mpi 0 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CMD="mpiexec -np 12 -ppn 12  -envall --hostfile nodefile \
 | 
				
			||||||
 | 
						     ../../gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						     ../../Test_dwf_mixedcg_prec --mpi 1.2.2.3 --grid 32.64.64.96 \
 | 
				
			||||||
 | 
							--shm-mpi 1 --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --debug-stdout --log Message --comms-overlap"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo $CMD > command-line
 | 
				
			||||||
 | 
					env > environment
 | 
				
			||||||
 | 
					$CMD &
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Suspicious wait is allowing jobs to collide and knock out
 | 
				
			||||||
 | 
					#wait
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sleep 6500
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for n in ` eval echo {1..$NN} `
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THIS_NODE=`head -n$n $PBS_NODEFILE | tail -n1 `
 | 
				
			||||||
 | 
					DIR=reproN.$PBS_JOBID/node-$n-$THIS_NODE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $DIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					grep Oops Grid.stderr.* > failures.$PBS_JOBID
 | 
				
			||||||
 | 
					rm core.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
@@ -1,4 +1,3 @@
 | 
				
			|||||||
BREW=/opt/local/
 | 
					CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ CXX=c++-13 MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug 
 | 
				
			||||||
MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,27 +30,60 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
using namespace std;
 | 
					using namespace std;
 | 
				
			||||||
using namespace Grid;
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class d>
 | 
					#ifndef HOST_NAME_MAX
 | 
				
			||||||
struct scal {
 | 
					#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
 | 
				
			||||||
  d internal;
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					template<class Matrix,class Field>
 | 
				
			||||||
 | 
					  class SchurDiagMooeeOperatorParanoid :  public SchurOperatorBase<Field> {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					    Matrix &_Mat;
 | 
				
			||||||
 | 
					    SchurDiagMooeeOperatorParanoid (Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
 | 
					    virtual  void Mpc      (const Field &in, Field &out) {
 | 
				
			||||||
 | 
					      Field tmp(in.Grid());
 | 
				
			||||||
 | 
					      tmp.Checkerboard() = !in.Checkerboard();
 | 
				
			||||||
 | 
					      //      std::cout <<" Mpc starting"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RealD nn = norm2(in); // std::cout <<" Mpc Prior to dslash norm is "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.Meooe(in,tmp);
 | 
				
			||||||
 | 
					      nn = norm2(tmp); //std::cout <<" Mpc Prior to Mooeinv "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.MooeeInv(tmp,out);
 | 
				
			||||||
 | 
					      nn = norm2(out); //std::cout <<" Mpc Prior to dslash norm is "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.Meooe(out,tmp);
 | 
				
			||||||
 | 
					      nn = norm2(tmp); //std::cout <<" Mpc Prior to Mooee "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.Mooee(in,out);
 | 
				
			||||||
 | 
					      nn = norm2(out); //std::cout <<" Mpc Prior to axpy "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      axpy(out,-1.0,tmp,out);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    virtual void MpcDag   (const Field &in, Field &out){
 | 
				
			||||||
 | 
					      Field tmp(in.Grid());
 | 
				
			||||||
 | 
					      //      std::cout <<" MpcDag starting"<<std::endl;
 | 
				
			||||||
 | 
					      RealD nn = norm2(in);// std::cout <<" MpcDag Prior to dslash norm is "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.MeooeDag(in,tmp);
 | 
				
			||||||
 | 
					      _Mat.MooeeInvDag(tmp,out);
 | 
				
			||||||
 | 
					      nn = norm2(out);// std::cout <<" MpcDag Prior to dslash norm is "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.MeooeDag(out,tmp);
 | 
				
			||||||
 | 
					      nn = norm2(tmp);// std::cout <<" MpcDag Prior to Mooee "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      _Mat.MooeeDag(in,out);
 | 
				
			||||||
 | 
					      nn = norm2(out);// std::cout <<" MpcDag Prior to axpy "<<nn<<std::endl;
 | 
				
			||||||
 | 
					      axpy(out,-1.0,tmp,out);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Gamma::Algebra Gmu [] = {
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
    Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
    Gamma::Algebra::GammaT
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main (int argc, char ** argv)
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  char hostname[HOST_NAME_MAX+1];
 | 
				
			||||||
 | 
					  gethostname(hostname, HOST_NAME_MAX+1);
 | 
				
			||||||
 | 
					  std::string host(hostname);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  Grid_init(&argc,&argv);
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const int Ls=12;
 | 
					  const int Ls=12;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  { 
 | 
					 | 
				
			||||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
 | 
					  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
					  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
					  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
@@ -89,10 +122,17 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  result_o_2.Checkerboard() = Odd;
 | 
					  result_o_2.Checkerboard() = Odd;
 | 
				
			||||||
  result_o_2 = Zero();
 | 
					  result_o_2 = Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
 | 
					  SchurDiagMooeeOperatorParanoid<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
 | 
				
			||||||
  SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
 | 
					  SchurDiagMooeeOperatorParanoid<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int nsecs=600;
 | 
				
			||||||
 | 
					  if( GridCmdOptionExists(argv,argv+argc,"--seconds") ){
 | 
				
			||||||
 | 
					    std::string arg = GridCmdOptionPayload(argv,argv+argc,"--seconds");
 | 
				
			||||||
 | 
					    GridCmdOptionInt(arg,nsecs);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "::::::::::::: Starting mixed CG for "<<nsecs <<" seconds" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
 | 
					 | 
				
			||||||
  MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
 | 
					  MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
 | 
				
			||||||
  double t1,t2,flops;
 | 
					  double t1,t2,flops;
 | 
				
			||||||
  double MdagMsiteflops = 1452; // Mobius (real coeffs)
 | 
					  double MdagMsiteflops = 1452; // Mobius (real coeffs)
 | 
				
			||||||
@@ -101,7 +141,26 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
 | 
					  std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
 | 
				
			||||||
  std:: cout << " CG    site flops = "<< CGsiteflops <<std::endl;
 | 
					  std:: cout << " CG    site flops = "<< CGsiteflops <<std::endl;
 | 
				
			||||||
  int iters;
 | 
					  int iters;
 | 
				
			||||||
  for(int i=0;i<10;i++){
 | 
					
 | 
				
			||||||
 | 
					  time_t start = time(NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FlightRecorder::ContinueOnFail = 0;
 | 
				
			||||||
 | 
					  FlightRecorder::PrintEntireLog = 0;
 | 
				
			||||||
 | 
					  FlightRecorder::ChecksumComms  = 1;
 | 
				
			||||||
 | 
					  FlightRecorder::ChecksumCommsSend=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if(char *s=getenv("GRID_PRINT_ENTIRE_LOG"))  FlightRecorder::PrintEntireLog     = atoi(s);
 | 
				
			||||||
 | 
					  if(char *s=getenv("GRID_CHECKSUM_RECV_BUF")) FlightRecorder::ChecksumComms      = atoi(s);
 | 
				
			||||||
 | 
					  if(char *s=getenv("GRID_CHECKSUM_SEND_BUF")) FlightRecorder::ChecksumCommsSend  = atoi(s);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int iter=0;
 | 
				
			||||||
 | 
					  do {
 | 
				
			||||||
 | 
					    if ( iter == 0 ) {
 | 
				
			||||||
 | 
					      FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeVerify);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cerr << "******************* SINGLE PRECISION SOLVE "<<iter<<std::endl;
 | 
				
			||||||
    result_o = Zero();
 | 
					    result_o = Zero();
 | 
				
			||||||
    t1=usecond();
 | 
					    t1=usecond();
 | 
				
			||||||
    mCG(src_o,result_o);
 | 
					    mCG(src_o,result_o);
 | 
				
			||||||
@@ -111,10 +170,24 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    flops+= CGsiteflops*FrbGrid->gSites()*iters;
 | 
					    flops+= CGsiteflops*FrbGrid->gSites()*iters;
 | 
				
			||||||
    std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
 | 
					    std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
 | 
				
			||||||
    std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
 | 
					    std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
 | 
				
			||||||
  }
 | 
					    std::cout << " SinglePrecision error count "<< FlightRecorder::ErrorCount()<<std::endl;
 | 
				
			||||||
  std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
 | 
					
 | 
				
			||||||
 | 
					    assert(FlightRecorder::ErrorCount()==0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << " FlightRecorder is OK! "<<std::endl;
 | 
				
			||||||
 | 
					    iter ++;
 | 
				
			||||||
 | 
					  } while (time(NULL) < (start + nsecs/10) );
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "::::::::::::: Starting double precision CG" << std::endl;
 | 
				
			||||||
  ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
 | 
					  ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
 | 
				
			||||||
  for(int i=0;i<1;i++){
 | 
					  int i=0;
 | 
				
			||||||
 | 
					  do { 
 | 
				
			||||||
 | 
					    if ( i == 0 ) {
 | 
				
			||||||
 | 
					      FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeVerify);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cerr << "******************* DOUBLE PRECISION SOLVE "<<i<<std::endl;
 | 
				
			||||||
    result_o_2 = Zero();
 | 
					    result_o_2 = Zero();
 | 
				
			||||||
    t1=usecond();
 | 
					    t1=usecond();
 | 
				
			||||||
    CG(HermOpEO,src_o,result_o_2);
 | 
					    CG(HermOpEO,src_o,result_o_2);
 | 
				
			||||||
@@ -125,43 +198,17 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
 | 
					    std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
 | 
				
			||||||
    std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
 | 
					    std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
 | 
				
			||||||
  }
 | 
					    std::cout << " DoublePrecision error count "<< FlightRecorder::ErrorCount()<<std::endl;
 | 
				
			||||||
  
 | 
					    assert(FlightRecorder::ErrorCount()==0);
 | 
				
			||||||
  //  MemoryManager::Print();
 | 
					    std::cout << " FlightRecorder is OK! "<<std::endl;
 | 
				
			||||||
 | 
					    i++;
 | 
				
			||||||
 | 
					  } while (time(NULL) < (start + nsecs) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LatticeFermionD diff_o(FrbGrid);
 | 
					  LatticeFermionD diff_o(FrbGrid);
 | 
				
			||||||
  RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
 | 
					  RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
 | 
					  std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
 | 
				
			||||||
 | 
					  assert(diff < 1e-4);
 | 
				
			||||||
  #ifdef HAVE_LIME
 | 
					 | 
				
			||||||
  if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  std::string file1("./Propagator1");
 | 
					 | 
				
			||||||
  emptyUserRecord record;
 | 
					 | 
				
			||||||
  uint32_t nersc_csum;
 | 
					 | 
				
			||||||
  uint32_t scidac_csuma;
 | 
					 | 
				
			||||||
  uint32_t scidac_csumb;
 | 
					 | 
				
			||||||
  typedef SpinColourVectorD   FermionD;
 | 
					 | 
				
			||||||
  typedef vSpinColourVectorD vFermionD;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BinarySimpleMunger<FermionD,FermionD> munge;
 | 
					 | 
				
			||||||
  std::string format = getFormatString<vFermionD>();
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o,file1,munge, 0, format,
 | 
					 | 
				
			||||||
						   nersc_csum,scidac_csuma,scidac_csumb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::cout << GridLogMessage << " Mixed checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BinaryIO::writeLatticeObject<vFermionD,FermionD>(result_o_2,file1,munge, 0, format,
 | 
					 | 
				
			||||||
						   nersc_csum,scidac_csuma,scidac_csumb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::cout << GridLogMessage << " CG checksums "<<std::hex << scidac_csuma << " "<<scidac_csumb<<std::endl;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  #endif
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  MemoryManager::Print();
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  Grid_finalize();
 | 
					  Grid_finalize();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										321
									
								
								tests/core/Test_sliceSum.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										321
									
								
								tests/core/Test_sliceSum.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,321 @@
 | 
				
			|||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline void sliceSumCPU(const Grid::Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  using namespace Grid;
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // FIXME precision promoted summation
 | 
				
			||||||
 | 
					  // may be important for correlation functions
 | 
				
			||||||
 | 
					  // But easily avoided by using double precision fields
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
 | 
					  typedef typename vobj::scalar_object::scalar_type scalar_type;
 | 
				
			||||||
 | 
					  GridBase  *grid = Data.Grid();
 | 
				
			||||||
 | 
					  assert(grid!=NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int    Nd = grid->_ndimension;
 | 
				
			||||||
 | 
					  const int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(orthogdim >= 0);
 | 
				
			||||||
 | 
					  assert(orthogdim < Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int fd=grid->_fdimensions[orthogdim];
 | 
				
			||||||
 | 
					  int ld=grid->_ldimensions[orthogdim];
 | 
				
			||||||
 | 
					  int rd=grid->_rdimensions[orthogdim];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Vector<vobj> lvSum(rd); // will locally sum vectors first
 | 
				
			||||||
 | 
					  Vector<sobj> lsSum(ld,Zero());                    // sum across these down to scalars
 | 
				
			||||||
 | 
					  ExtractBuffer<sobj> extracted(Nsimd);                  // splitting the SIMD
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result.resize(fd); // And then global sum to return the same vector to every node 
 | 
				
			||||||
 | 
					  for(int r=0;r<rd;r++){
 | 
				
			||||||
 | 
					    lvSum[r]=Zero();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int e1=    grid->_slice_nblock[orthogdim];
 | 
				
			||||||
 | 
					  int e2=    grid->_slice_block [orthogdim];
 | 
				
			||||||
 | 
					  int stride=grid->_slice_stride[orthogdim];
 | 
				
			||||||
 | 
					  int ostride=grid->_ostride[orthogdim];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //Reduce Data down to lvSum
 | 
				
			||||||
 | 
					  sliceSumReduction_cpu(Data,lvSum,rd, e1,e2,stride,ostride,Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Sum across simd lanes in the plane, breaking out orthog dir.
 | 
				
			||||||
 | 
					  Coordinate icoor(Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int rt=0;rt<rd;rt++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extract(lvSum[rt],extracted);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int idx=0;idx<Nsimd;idx++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      grid->iCoorFromIindex(icoor,idx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      int ldx =rt+icoor[orthogdim]*rd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      lsSum[ldx]=lsSum[ldx]+extracted[idx];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // sum over nodes.
 | 
				
			||||||
 | 
					  for(int t=0;t<fd;t++){
 | 
				
			||||||
 | 
					    int pt = t/ld; // processor plane
 | 
				
			||||||
 | 
					    int lt = t%ld;
 | 
				
			||||||
 | 
					    if ( pt == grid->_processor_coor[orthogdim] ) {
 | 
				
			||||||
 | 
					      result[t]=lsSum[lt];
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      result[t]=Zero();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  scalar_type * ptr = (scalar_type *) &result[0];
 | 
				
			||||||
 | 
					  int words = fd*sizeof(sobj)/sizeof(scalar_type);
 | 
				
			||||||
 | 
					  grid->GlobalSumVector(ptr, words);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Coordinate latt_size({64,64,64,16});
 | 
				
			||||||
 | 
					    auto simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
 | 
				
			||||||
 | 
					    auto mpi_layout = GridDefaultMpi();
 | 
				
			||||||
 | 
					    GridCartesian Grid(latt_size, simd_layout, mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int> seeds({1, 2, 3, 4});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GridParallelRNG pRNG(&Grid);
 | 
				
			||||||
 | 
					    pRNG.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeComplexD test_data(&Grid);
 | 
				
			||||||
 | 
					    gaussian(pRNG,test_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<TComplexD> reduction_reference;
 | 
				
			||||||
 | 
					    std::vector<TComplexD> reduction_result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //warmup
 | 
				
			||||||
 | 
					    for (int sweeps = 0; sweeps < 5; sweeps++) {
 | 
				
			||||||
 | 
					      reduction_result = sliceSum(test_data,0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int trace_id = traceStart("sliceSum benchmark - ComplexD");
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Testing ComplexD" << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(ComplexD) = " << sizeof(ComplexD) << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(vComplexD) = " << sizeof(vComplexD) << std::endl;
 | 
				
			||||||
 | 
					    for (int i = 0; i < Nd; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RealD t=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSum");
 | 
				
			||||||
 | 
					      sliceSumCPU(test_data,reduction_reference,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSum");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      t+=usecond();
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Orthog. dir. = " << i << std::endl;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "CPU sliceSum took "<<t<<" usecs"<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      RealD tgpu=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSumGpu");
 | 
				
			||||||
 | 
					      reduction_result = sliceSum(test_data,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSumGpu");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tgpu+=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage <<"GPU sliceSum took "<<tgpu<<" usecs"<<std::endl<<std::endl;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int t=0;t<reduction_reference.size();t++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        auto diff = reduction_reference[t]-reduction_result[t];
 | 
				
			||||||
 | 
					        assert(abs(TensorRemove(diff)) < 1e-8 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    traceStop(trace_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeSpinVectorD test_data_cv(&Grid);
 | 
				
			||||||
 | 
					    gaussian(pRNG,test_data_cv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<SpinVectorD> reduction_reference_cv;
 | 
				
			||||||
 | 
					    std::vector<SpinVectorD> reduction_result_cv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //warmup
 | 
				
			||||||
 | 
					    for (int sweeps = 0; sweeps < 5; sweeps++) {
 | 
				
			||||||
 | 
					      reduction_result_cv = sliceSum(test_data_cv,0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    trace_id = traceStart("sliceSum benchmark - SpinVectorD");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Testing SpinVectorD" << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(SpinVectorD) = " << sizeof(SpinVectorD) << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(vSpinVectorD) = " << sizeof(vSpinVectorD) << std::endl;
 | 
				
			||||||
 | 
					    for (int i = 0; i < Nd; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RealD t=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSum");
 | 
				
			||||||
 | 
					      sliceSumCPU(test_data_cv,reduction_reference_cv,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSum");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      t+=usecond();
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Orthog. dir. = " << i << std::endl;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "CPU sliceSum took "<<t<<" usecs"<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      RealD tgpu=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSumGpu");
 | 
				
			||||||
 | 
					      reduction_result_cv = sliceSum(test_data_cv,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSumGpu");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tgpu+=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage <<"GPU sliceSum took "<<tgpu<<" usecs"<<std::endl<<std::endl;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int t=0;t<reduction_reference_cv.size();t++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        auto diff = reduction_reference_cv[t]-reduction_result_cv[t];
 | 
				
			||||||
 | 
					        assert(abs(diff()(0)()) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(1)()) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(2)()) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(3)()) < 1e-8 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    traceStop(trace_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeSpinColourVectorD test_data_scv(&Grid);
 | 
				
			||||||
 | 
					    gaussian(pRNG,test_data_scv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<SpinColourVectorD> reduction_reference_scv;
 | 
				
			||||||
 | 
					    std::vector<SpinColourVectorD> reduction_result_scv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //warmup
 | 
				
			||||||
 | 
					    for (int sweeps = 0; sweeps < 5; sweeps++) {
 | 
				
			||||||
 | 
					      reduction_result_scv = sliceSum(test_data_scv,0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    trace_id = traceStart("sliceSum benchmark - SpinColourVectorD");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Testing SpinColourVectorD" << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(SpinColourVectorD) = " << sizeof(SpinColourVectorD) << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(vSpinColourVectorD) = " << sizeof(vSpinColourVectorD) << std::endl;
 | 
				
			||||||
 | 
					    for (int i = 0; i < Nd; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RealD t=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSum");
 | 
				
			||||||
 | 
					      sliceSumCPU(test_data_scv,reduction_reference_scv,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSum");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      t+=usecond();
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Orthog. dir. = " << i << std::endl;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "CPU sliceSum took "<<t<<" usecs"<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      RealD tgpu=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSumGpu");
 | 
				
			||||||
 | 
					      reduction_result_scv = sliceSum(test_data_scv,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSumGpu");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tgpu+=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage <<"GPU sliceSum took "<<tgpu<<" usecs"<<std::endl<<std::endl;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int t=0;t<reduction_reference_scv.size();t++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        auto diff = reduction_reference_scv[t]-reduction_result_scv[t];
 | 
				
			||||||
 | 
					        // std::cout << diff <<std::endl;
 | 
				
			||||||
 | 
					        assert(abs(diff()(0)(0)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(0)(1)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(0)(2)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(1)(0)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(1)(1)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(1)(2)) < 1e-8 );    
 | 
				
			||||||
 | 
					        assert(abs(diff()(2)(0)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(2)(1)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(2)(2)) < 1e-8 );    
 | 
				
			||||||
 | 
					        assert(abs(diff()(3)(0)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(3)(1)) < 1e-8 );
 | 
				
			||||||
 | 
					        assert(abs(diff()(3)(2)) < 1e-8 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    traceStop(trace_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeSpinColourMatrixD test_data_scm(&Grid);
 | 
				
			||||||
 | 
					    gaussian(pRNG,test_data_scm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<SpinColourMatrixD> reduction_reference_scm;
 | 
				
			||||||
 | 
					    std::vector<SpinColourMatrixD> reduction_result_scm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //warmup
 | 
				
			||||||
 | 
					    for (int sweeps = 0; sweeps < 5; sweeps++) {
 | 
				
			||||||
 | 
					      reduction_result_scm = sliceSum(test_data_scm,0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    trace_id = traceStart("sliceSum benchmark - SpinColourMatrixD");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Testing SpinColourMatrixD" << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(SpinColourMatrixD) = " << sizeof(SpinColourMatrixD) << std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "sizeof(vSpinColourMatrixD) = " << sizeof(vSpinColourMatrixD) << std::endl;
 | 
				
			||||||
 | 
					    for (int i = 0; i < Nd; i++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RealD t=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSum");
 | 
				
			||||||
 | 
					      sliceSumCPU(test_data_scm,reduction_reference_scm,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSum");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      t+=usecond();
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Orthog. dir. = " << i << std::endl;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "CPU sliceSum took "<<t<<" usecs"<<std::endl;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      RealD tgpu=-usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tracePush("sliceSumGpu");
 | 
				
			||||||
 | 
					      reduction_result_scm = sliceSum(test_data_scm,i);
 | 
				
			||||||
 | 
					      tracePop("sliceSumGpu");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tgpu+=usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage <<"GPU sliceSum took "<<tgpu<<" usecs"<<std::endl<<std::endl;;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int t=0;t<reduction_reference_scm.size();t++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        auto diff = reduction_reference_scm[t]-reduction_result_scm[t];
 | 
				
			||||||
 | 
					        // std::cout << diff <<std::endl;
 | 
				
			||||||
 | 
					        for (int is = 0; is < Ns; is++) {
 | 
				
			||||||
 | 
					          for (int js = 0; js < Ns; js++) {
 | 
				
			||||||
 | 
					            for (int ic = 0; ic < Nc; ic++) {
 | 
				
			||||||
 | 
					              for (int jc = 0; jc < Nc; jc++) {
 | 
				
			||||||
 | 
					                assert(abs(diff()(is,js)(ic,jc)) < 1e-8);
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    traceStop(trace_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid_finalize();
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -32,6 +32,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
using namespace std;
 | 
					using namespace std;
 | 
				
			||||||
using namespace Grid;
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This is to optimize the SIMD
 | 
				
			||||||
template<class vobj> void gpermute(vobj & inout,int perm){
 | 
					template<class vobj> void gpermute(vobj & inout,int perm){
 | 
				
			||||||
  vobj tmp=inout;
 | 
					  vobj tmp=inout;
 | 
				
			||||||
  if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
 | 
					  if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
 | 
				
			||||||
@@ -40,6 +41,7 @@ template<class vobj> void gpermute(vobj & inout,int perm){
 | 
				
			|||||||
  if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
 | 
					  if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main (int argc, char ** argv)
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  Grid_init(&argc,&argv);
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
@@ -47,20 +49,21 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  Coordinate latt_size  = GridDefaultLatt();
 | 
					  Coordinate latt_size  = GridDefaultLatt();
 | 
				
			||||||
  Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
					  Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
				
			||||||
  Coordinate mpi_layout = GridDefaultMpi();
 | 
					  Coordinate mpi_layout = GridDefaultMpi();
 | 
				
			||||||
  std::cout << " mpi "<<mpi_layout<<std::endl;
 | 
					  std::cout << GridLogMessage << " mpi "<<mpi_layout<<std::endl;
 | 
				
			||||||
  std::cout << " simd "<<simd_layout<<std::endl;
 | 
					  std::cout << GridLogMessage << " simd "<<simd_layout<<std::endl;
 | 
				
			||||||
  std::cout << " latt "<<latt_size<<std::endl;
 | 
					  std::cout << GridLogMessage << " latt "<<latt_size<<std::endl;
 | 
				
			||||||
  GridCartesian GRID(latt_size,simd_layout,mpi_layout);
 | 
					  GridCartesian GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Initialize configuration as hot start.
 | 
				
			||||||
  GridParallelRNG   pRNG(&GRID);
 | 
					  GridParallelRNG   pRNG(&GRID);
 | 
				
			||||||
  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
					 | 
				
			||||||
  LatticeGaugeField Umu(&GRID);
 | 
					  LatticeGaugeField Umu(&GRID);
 | 
				
			||||||
 | 
					  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
				
			||||||
  SU<Nc>::HotConfiguration(pRNG,Umu);
 | 
					  SU<Nc>::HotConfiguration(pRNG,Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
 | 
					  Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
 | 
				
			||||||
  LatticeComplex trplaq(&GRID);
 | 
					  LatticeComplex trplaq(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Store Umu in U. Peek/Poke mean respectively getElement/setElement.
 | 
				
			||||||
  std::vector<LatticeColourMatrix> U(Nd, Umu.Grid());
 | 
					  std::vector<LatticeColourMatrix> U(Nd, Umu.Grid());
 | 
				
			||||||
  for (int mu = 0; mu < Nd; mu++) {
 | 
					  for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
    U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
					    U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
@@ -70,9 +73,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  LatticeComplex cplaq(&GRID); cplaq=Zero();
 | 
					  LatticeComplex cplaq(&GRID); cplaq=Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Create a padded cell of extra padding depth=1
 | 
					  // Create a padded cell of extra padding depth=1
 | 
				
			||||||
  /////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  int depth = 1;
 | 
					  int depth = 1;
 | 
				
			||||||
  PaddedCell Ghost(depth,&GRID);
 | 
					  PaddedCell Ghost(depth,&GRID);
 | 
				
			||||||
  LatticeGaugeField Ughost = Ghost.Exchange(Umu);
 | 
					  LatticeGaugeField Ughost = Ghost.Exchange(Umu);
 | 
				
			||||||
@@ -114,18 +115,25 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ///// Array for the site plaquette
 | 
					  // Array for the site plaquette
 | 
				
			||||||
  GridBase *GhostGrid = Ughost.Grid();
 | 
					  GridBase *GhostGrid = Ughost.Grid();
 | 
				
			||||||
  LatticeComplex gplaq(GhostGrid); 
 | 
					  LatticeComplex gplaq(GhostGrid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Now we're going to put together the "stencil" that will be useful to us when
 | 
				
			||||||
 | 
					  // calculating the plaquette. Our eventual goal is to make the product
 | 
				
			||||||
 | 
					  //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x),
 | 
				
			||||||
 | 
					  // which requires, in order, the sites x, x+mu, x+nu, and x. We arrive at these
 | 
				
			||||||
 | 
					  // sites relative to x through "shifts", which is represented here by a 4-d
 | 
				
			||||||
 | 
					  // vector of 0s (no movement) and 1s (shift one unit) at each site. The
 | 
				
			||||||
 | 
					  // "stencil" is the set of all these shifts.
 | 
				
			||||||
  std::vector<Coordinate> shifts;
 | 
					  std::vector<Coordinate> shifts;
 | 
				
			||||||
  for(int mu=0;mu<Nd;mu++){
 | 
					  for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
    for(int nu=mu+1;nu<Nd;nu++){
 | 
					    for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
      //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x)
 | 
					 | 
				
			||||||
      Coordinate shift_0(Nd,0);
 | 
					      Coordinate shift_0(Nd,0);
 | 
				
			||||||
      Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
 | 
					      Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
 | 
				
			||||||
      Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
 | 
					      Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
 | 
				
			||||||
 | 
					      // push_back creates an element at the end of shifts and
 | 
				
			||||||
 | 
					      // assigns the data in the argument to it.
 | 
				
			||||||
      shifts.push_back(shift_0);
 | 
					      shifts.push_back(shift_0);
 | 
				
			||||||
      shifts.push_back(shift_mu);
 | 
					      shifts.push_back(shift_mu);
 | 
				
			||||||
      shifts.push_back(shift_nu);
 | 
					      shifts.push_back(shift_nu);
 | 
				
			||||||
@@ -135,10 +143,15 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  GeneralLocalStencil gStencil(GhostGrid,shifts);
 | 
					  GeneralLocalStencil gStencil(GhostGrid,shifts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  gplaq=Zero();
 | 
					  gplaq=Zero();
 | 
				
			||||||
  {
 | 
					
 | 
				
			||||||
 | 
					  // Before doing accelerator stuff, there is an opening and closing of "Views". I guess the
 | 
				
			||||||
 | 
					  // "Views" are stored in *_v variables listed below.
 | 
				
			||||||
  autoView( gp_v , gplaq, CpuWrite);
 | 
					  autoView( gp_v , gplaq, CpuWrite);
 | 
				
			||||||
  autoView( t_v , trplaq, CpuRead);
 | 
					  autoView( t_v , trplaq, CpuRead);
 | 
				
			||||||
  autoView( U_v , Ughost, CpuRead);
 | 
					  autoView( U_v , Ughost, CpuRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // This is now a loop over stencil shift elements. That is, s increases as we make our
 | 
				
			||||||
 | 
					  // way through the spacetimes sites, but also as we make our way around the plaquette.
 | 
				
			||||||
  for(int ss=0;ss<gp_v.size();ss++){
 | 
					  for(int ss=0;ss<gp_v.size();ss++){
 | 
				
			||||||
    int s=0;
 | 
					    int s=0;
 | 
				
			||||||
    for(int mu=0;mu<Nd;mu++){
 | 
					    for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
@@ -149,6 +162,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    	  auto SE2 = gStencil.GetEntry(s+2,ss);
 | 
					    	  auto SE2 = gStencil.GetEntry(s+2,ss);
 | 
				
			||||||
    	  auto SE3 = gStencil.GetEntry(s+3,ss);
 | 
					    	  auto SE3 = gStencil.GetEntry(s+3,ss);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Due to our strategy, each offset corresponds to a site.
 | 
				
			||||||
    	  int o0 = SE0->_offset;
 | 
					    	  int o0 = SE0->_offset;
 | 
				
			||||||
    	  int o1 = SE1->_offset;
 | 
					    	  int o1 = SE1->_offset;
 | 
				
			||||||
    	  int o2 = SE2->_offset;
 | 
					    	  int o2 = SE2->_offset;
 | 
				
			||||||
@@ -169,7 +183,11 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    	}
 | 
					    	}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  }
 | 
					
 | 
				
			||||||
 | 
					  // Here is my understanding of this part: The padded cell has its own periodic BCs, so
 | 
				
			||||||
 | 
					  // if I take a step to the right at the right-most side of the cell, I end up on the
 | 
				
			||||||
 | 
					  // left-most side. This means that the plaquettes in the padding are wrong. Luckily
 | 
				
			||||||
 | 
					  // all we care about are the plaquettes in the cell, which we obtain from Extract.
 | 
				
			||||||
  cplaq = Ghost.Extract(gplaq);
 | 
					  cplaq = Ghost.Extract(gplaq);
 | 
				
			||||||
  RealD vol = cplaq.Grid()->gSites();
 | 
					  RealD vol = cplaq.Grid()->gSites();
 | 
				
			||||||
  RealD faces = (Nd * (Nd-1))/2;
 | 
					  RealD faces = (Nd * (Nd-1))/2;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										181
									
								
								tests/smearing/Test_fatLinks.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								tests/smearing/Test_fatLinks.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,181 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./tests/smearing/Test_fatLinks.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2023
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: D. A. Clarke <clarke.davida@gmail.com> 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					    @file Test_fatLinks.cc
 | 
				
			||||||
 | 
					    @brief test of the HISQ smearing 
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/lattice/PaddedCell.h>
 | 
				
			||||||
 | 
					#include <Grid/stencil/GeneralLocalStencil.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/HISQSmearing.h>
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!  @brief parameter file to easily adjust Nloop */
 | 
				
			||||||
 | 
					struct ConfParameters: Serializable {
 | 
				
			||||||
 | 
					    GRID_SERIALIZABLE_CLASS_MEMBERS(
 | 
				
			||||||
 | 
					        ConfParameters,
 | 
				
			||||||
 | 
					        int, benchmark, 
 | 
				
			||||||
 | 
					        int, Nloop);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    template <class ReaderClass>
 | 
				
			||||||
 | 
					    ConfParameters(Reader<ReaderClass>& Reader){
 | 
				
			||||||
 | 
					        read(Reader, "parameters", *this);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik, 
 | 
				
			||||||
 | 
					               LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) {
 | 
				
			||||||
 | 
					    Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp);
 | 
				
			||||||
 | 
					    LatticeGaugeFieldD diff(&GRID), Uproj(&GRID);
 | 
				
			||||||
 | 
					    hisq_fat.smear(Usmr, Unaik, Umu);
 | 
				
			||||||
 | 
					    bool result;
 | 
				
			||||||
 | 
					    if (cnaik < 1e-30) { // Testing anything but Naik term
 | 
				
			||||||
 | 
					        diff = Ucontrol-Usmr;
 | 
				
			||||||
 | 
					        auto absDiff = norm2(diff)/norm2(Ucontrol);
 | 
				
			||||||
 | 
					        if (absDiff < 1e-30) {
 | 
				
			||||||
 | 
					            Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff);
 | 
				
			||||||
 | 
					            result = true;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff);
 | 
				
			||||||
 | 
					            result = false;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } else { // Testing Naik specifically
 | 
				
			||||||
 | 
					        diff = Ucontrol-Unaik;
 | 
				
			||||||
 | 
					        auto absDiff = norm2(diff)/norm2(Ucontrol);
 | 
				
			||||||
 | 
					        if (absDiff < 1e-30) {
 | 
				
			||||||
 | 
					            Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff);
 | 
				
			||||||
 | 
					            result = true;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff);
 | 
				
			||||||
 | 
					            result = false;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        hisq_fat.projectU3(Uproj,Ucontrol);
 | 
				
			||||||
 | 
					//        NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char** argv) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Params for the test.
 | 
				
			||||||
 | 
					    int Ns = 8;
 | 
				
			||||||
 | 
					    int Nt = 4;
 | 
				
			||||||
 | 
					    Coordinate latt_size(Nd,0); latt_size[0]=Ns; latt_size[1]=Ns; latt_size[2]=Ns; latt_size[3]=Nt;
 | 
				
			||||||
 | 
					    std::string conf_in  = "nersc.l8t4b3360";
 | 
				
			||||||
 | 
					    int threads          = GridThread::GetThreads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef LatticeGaugeFieldD LGF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Initialize the Grid
 | 
				
			||||||
 | 
					    Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					    Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
				
			||||||
 | 
					    Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					    Grid_log("mpi     = ",mpi_layout);
 | 
				
			||||||
 | 
					    Grid_log("simd    = ",simd_layout);
 | 
				
			||||||
 | 
					    Grid_log("latt    = ",latt_size);
 | 
				
			||||||
 | 
					    Grid_log("threads = ",threads);
 | 
				
			||||||
 | 
					    GridCartesian GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    XmlReader Reader("fatParams.xml",false,"grid");
 | 
				
			||||||
 | 
					    ConfParameters param(Reader);
 | 
				
			||||||
 | 
					    if(param.benchmark) Grid_log("  Nloop = ",param.Nloop);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LGF Umu(&GRID), Usmr(&GRID), Unaik(&GRID), Ucontrol(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Read the configuration into Umu
 | 
				
			||||||
 | 
					    FieldMetaData header;
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Umu, header, conf_in);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bool pass=true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Carry out various tests    
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control");
 | 
				
			||||||
 | 
					    pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control");
 | 
				
			||||||
 | 
					    pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control");
 | 
				
			||||||
 | 
					    pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control");
 | 
				
			||||||
 | 
					    pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
 | 
				
			||||||
 | 
					    NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control");
 | 
				
			||||||
 | 
					    pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(pass){
 | 
				
			||||||
 | 
					        Grid_pass("All tests passed.");
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        Grid_error("At least one test failed.");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Test a C-style instantiation 
 | 
				
			||||||
 | 
					    double path_coeff[6] = {1, 2, 3, 4, 5, 6};
 | 
				
			||||||
 | 
					    Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (param.benchmark) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        autoView(U_v, Umu, CpuRead); // Gauge accessor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Read in lattice sequentially, Nloop times 
 | 
				
			||||||
 | 
					        double lookupTime = 0.; 
 | 
				
			||||||
 | 
					        for(int i=0;i<param.Nloop;i++) {
 | 
				
			||||||
 | 
					            double start = usecond();
 | 
				
			||||||
 | 
					            for(int ss=0;ss<U_v.size();ss++)
 | 
				
			||||||
 | 
					                for(int mu=0;mu<Nd;mu++) {
 | 
				
			||||||
 | 
					                    auto U1 = U_v[ss](mu);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            double stop  = usecond();
 | 
				
			||||||
 | 
					        	lookupTime += stop-start; // microseconds
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        Grid_log("Time to lookup: ",lookupTime,"[ms]");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Raise a matrix to the power nmat, for each link. 
 | 
				
			||||||
 | 
					        auto U1 = U_v[0](0);
 | 
				
			||||||
 | 
					        for(int nmat=1;nmat<8;nmat++) {
 | 
				
			||||||
 | 
					            double multTime = 0.; 
 | 
				
			||||||
 | 
					            for(int i=0;i<param.Nloop;i++) {
 | 
				
			||||||
 | 
					                double start=usecond();
 | 
				
			||||||
 | 
					                for(int ss=0;ss<U_v.size();ss++)
 | 
				
			||||||
 | 
					                    for(int mu=0;mu<Nd;mu++) {
 | 
				
			||||||
 | 
					                        auto U2 = U1;
 | 
				
			||||||
 | 
					                        for(int j=1;j<nmat;j++) {
 | 
				
			||||||
 | 
					                            U2 *= U1;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                double stop=usecond();
 | 
				
			||||||
 | 
					                multTime += stop-start;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Grid_log("Time to multiply ",nmat," matrices: ",multTime," [ms]");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user