mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-26 09:39:34 +00:00 
			
		
		
		
	Compare commits
	
		
			127 Commits
		
	
	
		
			3c67d626ba
			...
			gauge-grou
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a976fa6746 | |||
| 6c66b8d997 | |||
| 9523ad3d73 | |||
| 73a95fa96f | |||
|  | 67e08aa952 | ||
|  | ed1f20f3a1 | ||
|  | cffc736bb3 | ||
|  | c0d56a1c04 | ||
|  | 3206f69478 | ||
|  | b2ccaad761 | ||
|  | 8eb1232683 | ||
|  | c6ce3ad03b | ||
|  | b3b033d343 | ||
|  | ca9816bfbb | ||
|  | 814d5abc7e | ||
|  | a29122e2bf | ||
|  | e188c0512e | ||
|  | 1fb6aaf150 | ||
|  | 894654f7ef | ||
|  | 109507888b | ||
|  | 68650b61fe | ||
|  | 7ee66bf453 | ||
|  | 8bd70ad8b5 | ||
|  | af98525766 | ||
|  | 1c2f218519 | ||
|  | c9aa1f507c | ||
|  | ea7126496d | ||
|  | f660dc67e4 | ||
|  | ede8faea74 | ||
|  | 1b750761c2 | ||
|  | 145acf2919 | ||
|  | cc4a27b9e6 | ||
|  | b4690e6091 | ||
|  | 4b24800132 | ||
|  | 9d2238148c | ||
|  | c15493218d | ||
|  | 001a556a34 | ||
|  | 3d0f88e702 | ||
|  | dd091d0960 | ||
|  | e2abbf9520 | ||
|  | c7baeb5bae | ||
|  | 402d80e197 | ||
|  | 86e33c8ab2 | ||
|  | 5dae6a6dac | ||
|  | 361bb8a101 | ||
|  | 7efdb3cd2b | ||
|  | 65ef4ec29f | ||
|  | d5835c0222 | ||
|  | a7b943b33e | ||
|  | 7440cde92f | ||
|  | 0fc662bb24 | ||
|  | 8195890640 | ||
|  | 4c88104a73 | ||
|  | 73b944c152 | ||
|  | d1b0b7f5c6 | ||
|  | 381d8797d0 | ||
|  | b06526bc1e | ||
|  | 3044419111 | ||
|  | bcfa9cf068 | ||
|  | 114920b8de | ||
|  | 0d588b95f4 | ||
|  | 5b3c530aa7 | ||
|  | c6a5499c8b | ||
|  | ec9c3fe77a | ||
|  | 6135ad530e | ||
|  | 40098424c7 | ||
|  | 7163b31a26 | ||
|  | ffbdd91e0e | ||
|  | 5d29e175d8 | ||
|  | 417dbfa257 | ||
|  | 1eda4d8e0b | ||
|  | 50181f16e5 | ||
|  | 75030637cc | ||
|  | fe5aaf7677 | ||
|  | 80ac2a73ca | ||
| d75a66a3e6 | |||
| fcc4374d7b | |||
| 67c3c16fe5 | |||
| 25e9be50b5 | |||
| 323cf6c038 | |||
|  | 29a22ae603 | ||
|  | 403bff1a47 | ||
|  | c50f27e68b | ||
|  | 80afacec5b | ||
|  | 6cd9224dd7 | ||
|  | 4bf8196ff1 | ||
|  | 4c5440fb06 | ||
| a269a3d919 | |||
|  | 0c4f585496 | ||
|  | 33d2df46a0 | ||
|  | 2df308f649 | ||
|  | 92def28bd3 | ||
| ca10bfa1c7 | |||
| 298a6ec51e | |||
|  | e5dbe488a6 | ||
|  | 0e27e3847d | ||
|  | 393727b93b | ||
|  | 2b1fcd78c3 | ||
|  | 0a4e0b49a0 | ||
|  | 76af169f05 | ||
|  | 7b89232251 | ||
|  | b5aeae526f | ||
|  | ef0ddd5d04 | ||
|  | 9b73dacf50 | ||
|  | 244b4aa07f | ||
|  | 8cfc7342cd | ||
|  | 15ae317858 | ||
|  | 834f536b5f | ||
|  | c332d9f08b | ||
| cf2923d5dd | |||
|  | 0e4413ddde | ||
| 009ccd581e | |||
|  | 8cd4263974 | ||
|  | d45c868656 | ||
|  | 955a8113de | ||
|  | dbe210dd53 | ||
|  | 86e11743ca | ||
|  | 980e721f6e | ||
|  | e2a0142d87 | ||
| 895244ecc3 | |||
| addeb621a7 | |||
|  | a7fb25adf6 | ||
|  | e947992957 | ||
|  | bb89a82a07 | ||
|  | 2bb374daea | ||
|  | 49ecbc81d4 | ||
|  | 9e5fb52eb9 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -88,6 +88,7 @@ Thumbs.db | |||||||
| # build directory # | # build directory # | ||||||
| ################### | ################### | ||||||
| build*/* | build*/* | ||||||
|  | Documentation/_build | ||||||
|  |  | ||||||
| # IDE related files # | # IDE related files # | ||||||
| ##################### | ##################### | ||||||
|   | |||||||
							
								
								
									
										56
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -1,56 +0,0 @@ | |||||||
| language: cpp |  | ||||||
|  |  | ||||||
| cache: |  | ||||||
|   directories: |  | ||||||
|     - clang |  | ||||||
|  |  | ||||||
| matrix: |  | ||||||
|   include: |  | ||||||
|     - os:        osx |  | ||||||
|       osx_image: xcode8.3 |  | ||||||
|       compiler: clang |  | ||||||
|        |  | ||||||
| before_install: |  | ||||||
|     - export GRIDDIR=`pwd` |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi |  | ||||||
|      |  | ||||||
| install: |  | ||||||
|     - export CWD=`pwd` |  | ||||||
|     - echo $CWD |  | ||||||
|     - export CC=$CC$VERSION |  | ||||||
|     - export CXX=$CXX$VERSION |  | ||||||
|     - echo $PATH |  | ||||||
|     - which autoconf |  | ||||||
|     - autoconf  --version |  | ||||||
|     - which automake |  | ||||||
|     - automake  --version |  | ||||||
|     - which $CC |  | ||||||
|     - $CC  --version |  | ||||||
|     - which $CXX |  | ||||||
|     - $CXX --version |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi |  | ||||||
|     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi |  | ||||||
|      |  | ||||||
| script: |  | ||||||
|     - ./bootstrap.sh |  | ||||||
|     - mkdir build |  | ||||||
|     - cd build |  | ||||||
|     - mkdir lime |  | ||||||
|     - cd lime |  | ||||||
|     - mkdir build |  | ||||||
|     - cd build |  | ||||||
|     - wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz |  | ||||||
|     - tar xf lime-1.3.2.tar.gz |  | ||||||
|     - cd lime-1.3.2 |  | ||||||
|     - ./configure --prefix=$CWD/build/lime/install |  | ||||||
|     - make -j4 |  | ||||||
|     - make install |  | ||||||
|     - cd $CWD/build |  | ||||||
|     - ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF} |  | ||||||
|     - make -j4  |  | ||||||
|     - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals |  | ||||||
|     - make check |  | ||||||
| @@ -442,6 +442,8 @@ public: | |||||||
|     for(int p=0; p<geom.npoint; p++) |     for(int p=0; p<geom.npoint; p++) | ||||||
|       points[p] = geom.points_dagger[p]; |       points[p] = geom.points_dagger[p]; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
| @@ -453,7 +455,7 @@ public: | |||||||
|       StencilEntry *SE; |       StencilEntry *SE; | ||||||
|  |  | ||||||
|       for(int p=0;p<geom_v.npoint;p++){ |       for(int p=0;p<geom_v.npoint;p++){ | ||||||
|         int point = points[p]; |         int point = points_p[p]; | ||||||
|  |  | ||||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
| @@ -708,6 +710,8 @@ public: | |||||||
|     for(int p=0; p<npoint; p++) |     for(int p=0; p<npoint; p++) | ||||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; |       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |     Vector<Aview> AcceleratorViewContainer; | ||||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); |     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
| @@ -728,7 +732,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points[p]; |           int point = points_p[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
| @@ -754,7 +758,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points[p]; |           int point = points_p[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
|   | |||||||
| @@ -136,7 +136,7 @@ public: | |||||||
|     flops=0; |     flops=0; | ||||||
|     usec =0; |     usec =0; | ||||||
|     Coordinate layout(Nd,1); |     Coordinate layout(Nd,1); | ||||||
|     sgrid = new GridCartesian(dimensions,layout,processors); |     sgrid = new GridCartesian(dimensions,layout,processors,*grid); | ||||||
|   }; |   }; | ||||||
|      |      | ||||||
|   ~FFT ( void)  { |   ~FFT ( void)  { | ||||||
| @@ -182,7 +182,7 @@ public: | |||||||
|     pencil_gd[dim] = G*processors[dim]; |     pencil_gd[dim] = G*processors[dim]; | ||||||
|        |        | ||||||
|     // Pencil global vol LxLxGxLxL per node |     // Pencil global vol LxLxGxLxL per node | ||||||
|     GridCartesian pencil_g(pencil_gd,layout,processors); |     GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid); | ||||||
|        |        | ||||||
|     // Construct pencils |     // Construct pencils | ||||||
|     typedef typename vobj::scalar_object sobj; |     typedef typename vobj::scalar_object sobj; | ||||||
|   | |||||||
| @@ -530,6 +530,16 @@ public: | |||||||
| template<class Field> class LinearFunction { | template<class Field> class LinearFunction { | ||||||
| public: | public: | ||||||
|   virtual void operator() (const Field &in, Field &out) = 0; |   virtual void operator() (const Field &in, Field &out) = 0; | ||||||
|  |  | ||||||
|  |   virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out) | ||||||
|  |   { | ||||||
|  |     assert(in.size() == out.size()); | ||||||
|  |  | ||||||
|  |     for (unsigned int i = 0; i < in.size(); ++i) | ||||||
|  |     { | ||||||
|  |       (*this)(in[i], out[i]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||||
|   | |||||||
| @@ -54,15 +54,23 @@ class DeflatedGuesser: public LinearFunction<Field> { | |||||||
| private: | private: | ||||||
|   const std::vector<Field> &evec; |   const std::vector<Field> &evec; | ||||||
|   const std::vector<RealD> &eval; |   const std::vector<RealD> &eval; | ||||||
|  |   const unsigned int       N; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; |   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) | ||||||
|  |   : DeflatedGuesser(_evec, _eval, _evec.size()) | ||||||
|  |   {} | ||||||
|  |  | ||||||
|  |   DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N) | ||||||
|  |   : evec(_evec), eval(_eval), N(_N) | ||||||
|  |   { | ||||||
|  |     assert(evec.size()==eval.size()); | ||||||
|  |     assert(N <= evec.size()); | ||||||
|  |   }  | ||||||
|  |  | ||||||
|   virtual void operator()(const Field &src,Field &guess) { |   virtual void operator()(const Field &src,Field &guess) { | ||||||
|     guess = Zero(); |     guess = Zero(); | ||||||
|     assert(evec.size()==eval.size()); |  | ||||||
|     auto N = evec.size(); |  | ||||||
|     for (int i=0;i<N;i++) { |     for (int i=0;i<N;i++) { | ||||||
|       const Field& tmp = evec[i]; |       const Field& tmp = evec[i]; | ||||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); |       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||||
|   | |||||||
| @@ -132,6 +132,31 @@ namespace Grid { | |||||||
|       (*this)(_Matrix,in,out,guess); |       (*this)(_Matrix,in,out,guess); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)  | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       Field tmp(grid); | ||||||
|  |       int nblock = in.size(); | ||||||
|  |       for(int b=0;b<nblock;b++){ | ||||||
|  | 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     // James can write his own deflated guesser | ||||||
|  |     // with optimised code for the inner products | ||||||
|  |     //    RedBlackSolveSplitGrid(); | ||||||
|  |     //    RedBlackSolve(_Matrix,src_o,sol_o);  | ||||||
|  |  | ||||||
|  |     void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out) | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       Field tmp(grid); | ||||||
|  |       int nblock = in.size(); | ||||||
|  |       for(int b=0;b<nblock;b++) { | ||||||
|  | 	pickCheckerboard(Even,tmp,in[b]); | ||||||
|  | 	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     template<class Guesser> |     template<class Guesser> | ||||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  |     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  | ||||||
|     { |     { | ||||||
| @@ -150,24 +175,29 @@ namespace Grid { | |||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Prepare RedBlack source |       // Prepare RedBlack source | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       for(int b=0;b<nblock;b++){ |       RedBlackSource(_Matrix,in,src_o); | ||||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | 	//      for(int b=0;b<nblock;b++){ | ||||||
|       } | 	//	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||||
|  | 	//      } | ||||||
|  |        | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Make the guesses |       // Make the guesses | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       if ( subGuess ) guess_save.resize(nblock,grid); |       if ( subGuess ) guess_save.resize(nblock,grid); | ||||||
|  |  | ||||||
|       for(int b=0;b<nblock;b++){ |        | ||||||
|         if(useSolnAsInitGuess) { |       if(useSolnAsInitGuess) { | ||||||
|  |         for(int b=0;b<nblock;b++){ | ||||||
|           pickCheckerboard(Odd, sol_o[b], out[b]); |           pickCheckerboard(Odd, sol_o[b], out[b]); | ||||||
|         } else { |  | ||||||
|           guess(src_o[b],sol_o[b]);  |  | ||||||
|         } |         } | ||||||
|  |       } else { | ||||||
|  |         guess(src_o, sol_o);  | ||||||
|  |       } | ||||||
|  |  | ||||||
| 	if ( subGuess ) {  | 	    if ( subGuess ) {  | ||||||
| 	  guess_save[b] = sol_o[b]; |         for(int b=0;b<nblock;b++){ | ||||||
| 	} |           guess_save[b] = sol_o[b]; | ||||||
|  |         } | ||||||
|       } |       } | ||||||
|       ////////////////////////////////////////////////////////////// |       ////////////////////////////////////////////////////////////// | ||||||
|       // Call the block solver |       // Call the block solver | ||||||
|   | |||||||
| @@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | bool Stencil_force_mpi = true; | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | extern bool Stencil_force_mpi ; | ||||||
|  |  | ||||||
| class CartesianCommunicator : public SharedMemory { | class CartesianCommunicator : public SharedMemory { | ||||||
|  |  | ||||||
| public:     | public:     | ||||||
|   | |||||||
| @@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   double off_node_bytes=0.0; |   double off_node_bytes=0.0; | ||||||
|   int tag; |   int tag; | ||||||
|  |  | ||||||
|   if ( gfrom ==MPI_UNDEFINED) { |   if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||||
|     tag= dir+from*32; |     tag= dir+from*32; | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
| @@ -378,12 +378,18 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( gdest == MPI_UNDEFINED ) { |   if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||||
|     tag= dir+_processor*32; |     tag= dir+_processor*32; | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|  |   } else { | ||||||
|  |     // TODO : make a OMP loop on CPU, call threaded bcopy | ||||||
|  |     void *shm = (void *) this->ShmBufferTranslate(dest,recv); | ||||||
|  |     assert(shm!=NULL); | ||||||
|  |     acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); | ||||||
|  |     acceleratorCopySynchronise(); // MPI prob slower | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { | ||||||
|   | |||||||
| @@ -35,6 +35,9 @@ Author: Christoph Lehner <christoph@lhnr.de> | |||||||
| #endif | #endif | ||||||
| #ifdef GRID_HIP | #ifdef GRID_HIP | ||||||
| #include <hip/hip_runtime_api.h> | #include <hip/hip_runtime_api.h> | ||||||
|  | #endif | ||||||
|  | #ifdef GRID_SYCl | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid);  | NAMESPACE_BEGIN(Grid);  | ||||||
| @@ -70,6 +73,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) | |||||||
|   WorldNodes = WorldSize/WorldShmSize; |   WorldNodes = WorldSize/WorldShmSize; | ||||||
|   assert( (WorldNodes * WorldShmSize) == WorldSize ); |   assert( (WorldNodes * WorldShmSize) == WorldSize ); | ||||||
|  |  | ||||||
|  |  | ||||||
|   // FIXME: Check all WorldShmSize are the same ? |   // FIXME: Check all WorldShmSize are the same ? | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
| @@ -446,7 +450,47 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Hugetlbfs mapping intended | // Hugetlbfs mapping intended | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #if defined(GRID_CUDA) ||defined(GRID_HIP) | #if defined(GRID_CUDA) ||defined(GRID_HIP)  || defined(GRID_SYCL) | ||||||
|  |  | ||||||
|  | //if defined(GRID_SYCL) | ||||||
|  | #if 0 | ||||||
|  | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
|  | { | ||||||
|  |   void * ShmCommBuf ;  | ||||||
|  |   assert(_ShmSetup==1); | ||||||
|  |   assert(_ShmAlloc==0); | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // allocate the pointer array for shared windows for our group | ||||||
|  |   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   MPI_Barrier(WorldShmComm); | ||||||
|  |   WorldShmCommBufs.resize(WorldShmSize); | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Each MPI rank should allocate our own buffer | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||||
|  |  | ||||||
|  |   if (ShmCommBuf == (void *)NULL ) { | ||||||
|  |     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||||
|  |     exit(EXIT_FAILURE);   | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||||
|  | 	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||||
|  |  | ||||||
|  |   SharedMemoryZero(ShmCommBuf,bytes); | ||||||
|  |  | ||||||
|  |   assert(WorldShmSize == 1); | ||||||
|  |   for(int r=0;r<WorldShmSize;r++){ | ||||||
|  |     WorldShmCommBufs[r] = ShmCommBuf; | ||||||
|  |   } | ||||||
|  |   _ShmAllocBytes=bytes; | ||||||
|  |   _ShmAlloc=1; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)   | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| { | { | ||||||
|   void * ShmCommBuf ;  |   void * ShmCommBuf ;  | ||||||
| @@ -470,18 +514,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   // Each MPI rank should allocate our own buffer |   // Each MPI rank should allocate our own buffer | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   ShmCommBuf = acceleratorAllocDevice(bytes); |   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||||
|  |  | ||||||
|   if (ShmCommBuf == (void *)NULL ) { |   if (ShmCommBuf == (void *)NULL ) { | ||||||
|     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; |     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||||
|     exit(EXIT_FAILURE);   |     exit(EXIT_FAILURE);   | ||||||
|   } |   } | ||||||
|   //  if ( WorldRank == 0 ){ |   if ( WorldRank == 0 ){ | ||||||
|   if ( 1 ){ |  | ||||||
|     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  |     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||||
|   } |   } | ||||||
|   SharedMemoryZero(ShmCommBuf,bytes); |   SharedMemoryZero(ShmCommBuf,bytes); | ||||||
|  |   std::cout<< "Setting up IPC"<<std::endl; | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Loop over ranks/gpu's on our node |   // Loop over ranks/gpu's on our node | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -491,6 +533,29 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // If it is me, pass around the IPC access key |     // If it is me, pass around the IPC access key | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|  |     void * thisBuf = ShmCommBuf; | ||||||
|  |     if(!Stencil_force_mpi) { | ||||||
|  | #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||||
|  |     typedef struct { int fd; pid_t pid ; } clone_mem_t; | ||||||
|  |  | ||||||
|  |     auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); | ||||||
|  |     auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); | ||||||
|  |        | ||||||
|  |     ze_ipc_mem_handle_t ihandle; | ||||||
|  |     clone_mem_t handle; | ||||||
|  |  | ||||||
|  |     if ( r==WorldShmRank ) {  | ||||||
|  |       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); | ||||||
|  |       if ( err != ZE_RESULT_SUCCESS ) { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||||
|  | 	exit(EXIT_FAILURE); | ||||||
|  |       } else { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||||
|  |       } | ||||||
|  |       memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int)); | ||||||
|  |       handle.pid = getpid(); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     cudaIpcMemHandle_t handle; |     cudaIpcMemHandle_t handle; | ||||||
|     if ( r==WorldShmRank ) {  |     if ( r==WorldShmRank ) {  | ||||||
| @@ -511,6 +576,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // Share this IPC handle across the Shm Comm |     // Share this IPC handle across the Shm Comm | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
| @@ -526,7 +592,35 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // If I am not the source, overwrite thisBuf with remote buffer |     // If I am not the source, overwrite thisBuf with remote buffer | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     void * thisBuf = ShmCommBuf; |  | ||||||
|  | #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||||
|  |     if ( r!=WorldShmRank ) { | ||||||
|  |       thisBuf = nullptr; | ||||||
|  |       std::cout<<"mapping seeking remote pid/fd " | ||||||
|  | 	       <<handle.pid<<"/" | ||||||
|  | 	       <<handle.fd<<std::endl; | ||||||
|  |  | ||||||
|  |       int pidfd = syscall(SYS_pidfd_open,handle.pid,0); | ||||||
|  |       std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n"; | ||||||
|  |       //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0); | ||||||
|  |       int myfd  = syscall(438,pidfd,handle.fd,0); | ||||||
|  |  | ||||||
|  |       std::cout<<"Using IpcHandle myfd "<<myfd<<"\n"; | ||||||
|  |        | ||||||
|  |       memcpy((void *)&ihandle,(void *)&myfd,sizeof(int)); | ||||||
|  |  | ||||||
|  |       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf); | ||||||
|  |       if ( err != ZE_RESULT_SUCCESS ) { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||||
|  | 	exit(EXIT_FAILURE); | ||||||
|  |       } else { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl; | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl; | ||||||
|  |       } | ||||||
|  |       assert(thisBuf!=nullptr); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     if ( r!=WorldShmRank ) {  |     if ( r!=WorldShmRank ) {  | ||||||
|       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); |       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); | ||||||
| @@ -548,6 +642,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // Save a copy of the device buffers |     // Save a copy of the device buffers | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|  |     } | ||||||
|     WorldShmCommBufs[r] = thisBuf; |     WorldShmCommBufs[r] = thisBuf; | ||||||
| #else | #else | ||||||
|     WorldShmCommBufs[r] = ShmCommBuf; |     WorldShmCommBufs[r] = ShmCommBuf; | ||||||
| @@ -557,6 +652,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   _ShmAllocBytes=bytes; |   _ShmAllocBytes=bytes; | ||||||
|   _ShmAlloc=1; |   _ShmAlloc=1; | ||||||
| } | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #else  | #else  | ||||||
| #ifdef GRID_MPI3_SHMMMAP | #ifdef GRID_MPI3_SHMMMAP | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| @@ -727,16 +824,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| ///////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////// | ||||||
| void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | ||||||
| { | { | ||||||
| #ifdef GRID_CUDA | #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | ||||||
|   cudaMemset(dest,0,bytes); |   acceleratorMemSet(dest,0,bytes); | ||||||
| #else | #else | ||||||
|   bzero(dest,bytes); |   bzero(dest,bytes); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | ||||||
| { | { | ||||||
| #ifdef GRID_CUDA | #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | ||||||
|   cudaMemcpy(dest,src,bytes,cudaMemcpyDefault); |   acceleratorCopyToDevice(src,dest,bytes); | ||||||
| #else    | #else    | ||||||
|   bcopy(src,dest,bytes); |   bcopy(src,dest,bytes); | ||||||
| #endif | #endif | ||||||
| @@ -800,7 +897,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) | |||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   SharedMemoryTest(); |   //SharedMemoryTest(); | ||||||
| } | } | ||||||
| ////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////// | ||||||
| // On node barrier | // On node barrier | ||||||
|   | |||||||
| @@ -122,8 +122,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r | |||||||
|   assert(shift<fd); |   assert(shift<fd); | ||||||
|    |    | ||||||
|   int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; |   int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; | ||||||
|   cshiftVector<vobj> send_buf(buffer_size); |   static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size); | ||||||
|   cshiftVector<vobj> recv_buf(buffer_size); |   static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size); | ||||||
|      |      | ||||||
|   int cb= (cbmask==0x2)? Odd : Even; |   int cb= (cbmask==0x2)? Odd : Even; | ||||||
|   int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); |   int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); | ||||||
| @@ -198,8 +198,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo | |||||||
|   int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; |   int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; | ||||||
|   //  int words = sizeof(vobj)/sizeof(vector_type); |   //  int words = sizeof(vobj)/sizeof(vector_type); | ||||||
|  |  | ||||||
|   std::vector<cshiftVector<scalar_object> >  send_buf_extract(Nsimd); |   static std::vector<cshiftVector<scalar_object> >  send_buf_extract; send_buf_extract.resize(Nsimd); | ||||||
|   std::vector<cshiftVector<scalar_object> >  recv_buf_extract(Nsimd); |   static std::vector<cshiftVector<scalar_object> >  recv_buf_extract; recv_buf_extract.resize(Nsimd); | ||||||
|   scalar_object *  recv_buf_extract_mpi; |   scalar_object *  recv_buf_extract_mpi; | ||||||
|   scalar_object *  send_buf_extract_mpi; |   scalar_object *  send_buf_extract_mpi; | ||||||
|   |   | ||||||
| @@ -294,8 +294,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r | |||||||
|   assert(shift<fd); |   assert(shift<fd); | ||||||
|    |    | ||||||
|   int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; |   int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; | ||||||
|   cshiftVector<vobj> send_buf_v(buffer_size); |   static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size); | ||||||
|   cshiftVector<vobj> recv_buf_v(buffer_size); |   static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size); | ||||||
|   vobj *send_buf; |   vobj *send_buf; | ||||||
|   vobj *recv_buf; |   vobj *recv_buf; | ||||||
|   { |   { | ||||||
| @@ -381,8 +381,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo | |||||||
|   int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; |   int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; | ||||||
|   //  int words = sizeof(vobj)/sizeof(vector_type); |   //  int words = sizeof(vobj)/sizeof(vector_type); | ||||||
|  |  | ||||||
|   std::vector<cshiftVector<scalar_object> >  send_buf_extract(Nsimd); |   static std::vector<cshiftVector<scalar_object> >  send_buf_extract; send_buf_extract.resize(Nsimd); | ||||||
|   std::vector<cshiftVector<scalar_object> >  recv_buf_extract(Nsimd); |   static std::vector<cshiftVector<scalar_object> >  recv_buf_extract; recv_buf_extract.resize(Nsimd); | ||||||
|   scalar_object *  recv_buf_extract_mpi; |   scalar_object *  recv_buf_extract_mpi; | ||||||
|   scalar_object *  send_buf_extract_mpi; |   scalar_object *  send_buf_extract_mpi; | ||||||
|   { |   { | ||||||
|   | |||||||
| @@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> & | |||||||
|   autoView( x_v , x, AcceleratorRead); |   autoView( x_v , x, AcceleratorRead); | ||||||
|   autoView( y_v , y, AcceleratorRead); |   autoView( y_v , y, AcceleratorRead); | ||||||
|   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ |   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ | ||||||
|     auto tmp = a*x_v(ss)+y_v(ss); |     auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]); | ||||||
|     coalescedWrite(ret_v[ss],tmp); |     coalescedWrite(ret_v[ss],tmp); | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
|  |  | ||||||
| 	for(int k=k0; k<k1; ++k){ | 	for(int k=k0; k<k1; ++k){ | ||||||
| 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | ||||||
| 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); | 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss])); | ||||||
| 	} | 	} | ||||||
|       }); |       }); | ||||||
|  |  | ||||||
| @@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
| 	int jj  =j0+j; | 	int jj  =j0+j; | ||||||
| 	int ss =sj/nrot; | 	int ss =sj/nrot; | ||||||
| 	int sss=ss+s; | 	int sss=ss+s; | ||||||
| 	coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); | 	coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||||
|       }); |       }); | ||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -361,6 +361,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   // But easily avoided by using double precision fields |   // But easily avoided by using double precision fields | ||||||
|   /////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////// | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_object::scalar_type scalar_type; | ||||||
|   GridBase  *grid = Data.Grid(); |   GridBase  *grid = Data.Grid(); | ||||||
|   assert(grid!=NULL); |   assert(grid!=NULL); | ||||||
|  |  | ||||||
| @@ -419,20 +420,19 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   // sum over nodes. |   // sum over nodes. | ||||||
|   sobj gsum; |  | ||||||
|   for(int t=0;t<fd;t++){ |   for(int t=0;t<fd;t++){ | ||||||
|     int pt = t/ld; // processor plane |     int pt = t/ld; // processor plane | ||||||
|     int lt = t%ld; |     int lt = t%ld; | ||||||
|     if ( pt == grid->_processor_coor[orthogdim] ) { |     if ( pt == grid->_processor_coor[orthogdim] ) { | ||||||
|       gsum=lsSum[lt]; |       result[t]=lsSum[lt]; | ||||||
|     } else { |     } else { | ||||||
|       gsum=Zero(); |       result[t]=Zero(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     grid->GlobalSum(gsum); |  | ||||||
|  |  | ||||||
|     result[t]=gsum; |  | ||||||
|   } |   } | ||||||
|  |   scalar_type * ptr = (scalar_type *) &result[0]; | ||||||
|  |   int words = fd*sizeof(sobj)/sizeof(scalar_type); | ||||||
|  |   grid->GlobalSumVector(ptr, words); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
|   | |||||||
| @@ -364,16 +364,22 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|   autoView( coarseData_ , coarseData, AcceleratorWrite); |   autoView( coarseData_ , coarseData, AcceleratorWrite); | ||||||
|   autoView( fineData_   , fineData, AcceleratorRead); |   autoView( fineData_   , fineData, AcceleratorRead); | ||||||
|  |  | ||||||
|  |   auto coarseData_p = &coarseData_[0]; | ||||||
|  |   auto fineData_p = &fineData_[0]; | ||||||
|  |    | ||||||
|   Coordinate fine_rdimensions = fine->_rdimensions; |   Coordinate fine_rdimensions = fine->_rdimensions; | ||||||
|   Coordinate coarse_rdimensions = coarse->_rdimensions; |   Coordinate coarse_rdimensions = coarse->_rdimensions; | ||||||
|  |  | ||||||
|  |   vobj zz = Zero(); | ||||||
|    |    | ||||||
|   accelerator_for(sc,coarse->oSites(),1,{ |   accelerator_for(sc,coarse->oSites(),1,{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|       coarseData_[sc]=Zero(); |  | ||||||
|  |  | ||||||
|  |       vobj cd = zz; | ||||||
|  |        | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
| 	int sf; | 	int sf; | ||||||
| @@ -383,9 +389,11 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||||
|  |  | ||||||
| 	coarseData_[sc]=coarseData_[sc]+fineData_[sf]; | 	cd=cd+fineData_p[sf]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |       coarseData_p[sc] = cd; | ||||||
|  |  | ||||||
|     }); |     }); | ||||||
|   return; |   return; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -128,7 +128,7 @@ inline void MachineCharacteristics(FieldMetaData &header) | |||||||
|   std::time_t t = std::time(nullptr); |   std::time_t t = std::time(nullptr); | ||||||
|   std::tm tm_ = *std::localtime(&t); |   std::tm tm_ = *std::localtime(&t); | ||||||
|   std::ostringstream oss;  |   std::ostringstream oss;  | ||||||
|   //      oss << std::put_time(&tm_, "%c %Z"); |   oss << std::put_time(&tm_, "%c %Z"); | ||||||
|   header.creation_date = oss.str(); |   header.creation_date = oss.str(); | ||||||
|   header.archive_date  = header.creation_date; |   header.archive_date  = header.creation_date; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -205,11 +205,20 @@ public: | |||||||
|     std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; |     std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // Preferred interface | ||||||
|  |   template<class GaugeStats=PeriodicGaugeStatistics> | ||||||
|  |   static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, | ||||||
|  | 					std::string file,  | ||||||
|  | 					std::string ens_label = std::string("DWF")) | ||||||
|  |   { | ||||||
|  |     writeConfiguration(Umu,file,0,1,ens_label); | ||||||
|  |   } | ||||||
|   template<class GaugeStats=PeriodicGaugeStatistics> |   template<class GaugeStats=PeriodicGaugeStatistics> | ||||||
|   static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, |   static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, | ||||||
| 					std::string file,  | 					std::string file,  | ||||||
| 					int two_row, | 					int two_row, | ||||||
| 					int bits32) | 					int bits32, | ||||||
|  | 					std::string ens_label = std::string("DWF")) | ||||||
|   { |   { | ||||||
|     typedef vLorentzColourMatrixD vobj; |     typedef vLorentzColourMatrixD vobj; | ||||||
|     typedef typename vobj::scalar_object sobj; |     typedef typename vobj::scalar_object sobj; | ||||||
| @@ -219,8 +228,8 @@ public: | |||||||
|     // Following should become arguments |     // Following should become arguments | ||||||
|     /////////////////////////////////////////// |     /////////////////////////////////////////// | ||||||
|     header.sequence_number = 1; |     header.sequence_number = 1; | ||||||
|     header.ensemble_id     = "UKQCD"; |     header.ensemble_id     = std::string("UKQCD"); | ||||||
|     header.ensemble_label  = "DWF"; |     header.ensemble_label  = ens_label; | ||||||
|  |  | ||||||
|     typedef LorentzColourMatrixD fobj3D; |     typedef LorentzColourMatrixD fobj3D; | ||||||
|     typedef LorentzColour2x3D    fobj2D; |     typedef LorentzColour2x3D    fobj2D; | ||||||
| @@ -232,7 +241,7 @@ public: | |||||||
|     GaugeStats Stats; Stats(Umu,header); |     GaugeStats Stats; Stats(Umu,header); | ||||||
|     MachineCharacteristics(header); |     MachineCharacteristics(header); | ||||||
|  |  | ||||||
| 	uint64_t offset; |     uint64_t offset; | ||||||
|  |  | ||||||
|     // Sod it -- always write 3x3 double |     // Sod it -- always write 3x3 double | ||||||
|     header.floating_point = std::string("IEEE64BIG"); |     header.floating_point = std::string("IEEE64BIG"); | ||||||
|   | |||||||
| @@ -115,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR; | |||||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||||
|  |  | ||||||
| typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | ||||||
| typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | ||||||
| typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | ||||||
| typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | ||||||
| @@ -158,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; | |||||||
| typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | ||||||
| typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | ||||||
| typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | ||||||
| typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<WilsonImplR> MobiusFermionR; | typedef MobiusFermion<WilsonImplR> MobiusFermionR; | ||||||
| typedef MobiusFermion<WilsonImplF> MobiusFermionF; | typedef MobiusFermion<WilsonImplF> MobiusFermionF; | ||||||
| typedef MobiusFermion<WilsonImplD> MobiusFermionD; | typedef MobiusFermion<WilsonImplD> MobiusFermionD; | ||||||
|  |  | ||||||
| typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | ||||||
| typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | ||||||
| typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | ||||||
| typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | ||||||
| typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | ||||||
| typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | ||||||
| typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | ||||||
|  |  | ||||||
| typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | ||||||
| typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | ||||||
| typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | ||||||
|  |  | ||||||
| // Ls vectorised | // Ls vectorised | ||||||
| typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | ||||||
| @@ -235,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR>     GparityWilsonFermionR; | |||||||
| typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | ||||||
| typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | ||||||
|  |  | ||||||
| typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | //typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | ||||||
| typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | //typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | ||||||
| typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | //typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | ||||||
| typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | ||||||
| typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | ||||||
| typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | ||||||
| typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | ||||||
|  |  | ||||||
| typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | ||||||
| typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | ||||||
| typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | ||||||
|  |  | ||||||
| typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | ||||||
| typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | ||||||
| typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | ||||||
| @@ -291,12 +291,6 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR; | |||||||
| typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF; | typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF; | ||||||
| typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD; | typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD; | ||||||
|  |  | ||||||
| #ifndef GRID_CUDA |  | ||||||
| typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR; |  | ||||||
| typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF; |  | ||||||
| typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD; |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| //////////////////// | //////////////////// | ||||||
|   | |||||||
| @@ -183,7 +183,8 @@ NAMESPACE_CHECK(ImplStaggered); | |||||||
| ///////////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////////// | ||||||
| // Single flavour one component spinors with colour index. 5d vec | // Single flavour one component spinors with colour index. 5d vec | ||||||
| ///////////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////////// | ||||||
| #include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>  | // Deprecate Vec5d | ||||||
| NAMESPACE_CHECK(ImplStaggered5dVec);   | //#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>  | ||||||
|  | //NAMESPACE_CHECK(ImplStaggered5dVec);   | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -327,8 +327,8 @@ typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> Gparit | |||||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | ||||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | ||||||
|   |   | ||||||
| typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | ||||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | ||||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -72,19 +72,23 @@ public: | |||||||
|      |      | ||||||
|   StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; |   StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; | ||||||
|        |        | ||||||
|   static accelerator_inline void multLink(SiteSpinor &phi, |   template<class _Spinor> | ||||||
|  |   static accelerator_inline void multLink(_Spinor &phi, | ||||||
| 		       const SiteDoubledGaugeField &U, | 		       const SiteDoubledGaugeField &U, | ||||||
| 		       const SiteSpinor &chi, | 		       const _Spinor &chi, | ||||||
| 		       int mu) | 		       int mu) | ||||||
|   { |   { | ||||||
|     mult(&phi(), &U(mu), &chi()); |     auto UU = coalescedRead(U(mu)); | ||||||
|  |     mult(&phi(), &UU, &chi()); | ||||||
|   } |   } | ||||||
|   static accelerator_inline void multLinkAdd(SiteSpinor &phi, |   template<class _Spinor> | ||||||
|  |   static accelerator_inline void multLinkAdd(_Spinor &phi, | ||||||
| 			  const SiteDoubledGaugeField &U, | 			  const SiteDoubledGaugeField &U, | ||||||
| 			  const SiteSpinor &chi, | 			  const _Spinor &chi, | ||||||
| 			  int mu) | 			  int mu) | ||||||
|   { |   { | ||||||
|     mac(&phi(), &U(mu), &chi()); |     auto UU = coalescedRead(U(mu)); | ||||||
|  |     mac(&phi(), &UU, &chi()); | ||||||
|   } |   } | ||||||
|        |        | ||||||
|   template <class ref> |   template <class ref> | ||||||
|   | |||||||
| @@ -68,11 +68,12 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { |     typedef decltype(coalescedRead(buf)) sobj; | ||||||
|     _SiteHalfSpinor tmp; |     sobj sp; | ||||||
|     projector::Proj(tmp,in,mu,dag); |     auto sin = coalescedRead(in); | ||||||
|     vstream(buf[o],tmp); |     projector::Proj(sp,sin,mu,dag); | ||||||
|  |     coalescedWrite(buf,sp); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -82,13 +83,18 @@ public: | |||||||
| 				   const SiteHalfSpinor * __restrict__ vp0, | 				   const SiteHalfSpinor * __restrict__ vp0, | ||||||
| 				   const SiteHalfSpinor * __restrict__ vp1, | 				   const SiteHalfSpinor * __restrict__ vp1, | ||||||
| 				   Integer type,Integer o) const { | 				   Integer type,Integer o) const { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); | ||||||
|  | #else | ||||||
|     SiteHalfSpinor tmp1; |     SiteHalfSpinor tmp1; | ||||||
|     SiteHalfSpinor tmp2; |     SiteHalfSpinor tmp2; | ||||||
|     exchange(tmp1,tmp2,vp0[o],vp1[o],type); |     exchange(tmp1,tmp2,vp0[o],vp1[o],type); | ||||||
|     vstream(mp[2*o  ],tmp1); |     vstream(mp[2*o  ],tmp1); | ||||||
|     vstream(mp[2*o+1],tmp2); |     vstream(mp[2*o+1],tmp2); | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Have a decompression step if mpi data is not same */ |   /* Have a decompression step if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -105,6 +111,28 @@ public: | |||||||
| 					   const SiteSpinor * __restrict__ in, | 					   const SiteSpinor * __restrict__ in, | ||||||
| 					   Integer j,Integer k, Integer m,Integer type) const | 					   Integer j,Integer k, Integer m,Integer type) const | ||||||
|   { |   { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     typedef SiteSpinor vobj; | ||||||
|  |     typedef SiteHalfSpinor hvobj; | ||||||
|  |     typedef decltype(coalescedRead(*in))    sobj; | ||||||
|  |     typedef decltype(coalescedRead(*out0)) hsobj; | ||||||
|  |  | ||||||
|  |     unsigned int Nsimd = vobj::Nsimd(); | ||||||
|  |     unsigned int mask = Nsimd >> (type + 1); | ||||||
|  |     int lane = acceleratorSIMTlane(Nsimd); | ||||||
|  |     int j0 = lane &(~mask); // inner coor zero | ||||||
|  |     int j1 = lane |(mask) ; // inner coor one | ||||||
|  |     const vobj *vp0 = &in[k]; | ||||||
|  |     const vobj *vp1 = &in[m]; | ||||||
|  |     const vobj *vp = (lane&mask) ? vp1:vp0; | ||||||
|  |     auto sa = coalescedRead(*vp,j0); | ||||||
|  |     auto sb = coalescedRead(*vp,j1); | ||||||
|  |     hsobj psa, psb; | ||||||
|  |     projector::Proj(psa,sa,mu,dag); | ||||||
|  |     projector::Proj(psb,sb,mu,dag); | ||||||
|  |     coalescedWrite(out0[j],psa); | ||||||
|  |     coalescedWrite(out1[j],psb); | ||||||
|  | #else | ||||||
|     SiteHalfSpinor temp1, temp2; |     SiteHalfSpinor temp1, temp2; | ||||||
|     SiteHalfSpinor temp3, temp4; |     SiteHalfSpinor temp3, temp4; | ||||||
|     projector::Proj(temp1,in[k],mu,dag); |     projector::Proj(temp1,in[k],mu,dag); | ||||||
| @@ -112,6 +140,7 @@ public: | |||||||
|     exchange(temp3,temp4,temp1,temp2,type); |     exchange(temp3,temp4,temp1,temp2,type); | ||||||
|     vstream(out0[j],temp3); |     vstream(out0[j],temp3); | ||||||
|     vstream(out1[j],temp4); |     vstream(out1[j],temp4); | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -121,6 +150,7 @@ public: | |||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
| template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | ||||||
| class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | ||||||
| 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | ||||||
| @@ -149,13 +179,23 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { |     SiteHalfSpinor hsp; | ||||||
|     _SiteHalfSpinor hsp; |  | ||||||
|     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; |     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; | ||||||
|     projector::Proj(hsp,in,mu,dag); |     projector::Proj(hsp,in,mu,dag); | ||||||
|     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); |     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); | ||||||
|   } |   } | ||||||
|  |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     typedef decltype(coalescedRead(buf)) sobj; | ||||||
|  |     sobj sp; | ||||||
|  |     auto sin = coalescedRead(in); | ||||||
|  |     projector::Proj(sp,sin,mu,dag); | ||||||
|  |     coalescedWrite(buf,sp); | ||||||
|  | #else | ||||||
|  |     projector::Proj(buf,in,mu,dag); | ||||||
|  | #endif | ||||||
|  |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Exchange includes precision change if mpi data is not same */ |   /* Exchange includes precision change if mpi data is not same */ | ||||||
| @@ -203,6 +243,7 @@ public: | |||||||
|   accelerator_inline bool DecompressionStep(void) const { return true; } |   accelerator_inline bool DecompressionStep(void) const { return true; } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | ||||||
|   class Projector {							\ |   class Projector {							\ | ||||||
| @@ -253,33 +294,8 @@ public: | |||||||
|   typedef typename Base::View_type View_type; |   typedef typename Base::View_type View_type; | ||||||
|   typedef typename Base::StencilVector StencilVector; |   typedef typename Base::StencilVector StencilVector; | ||||||
|  |  | ||||||
|   double timer0; |   void ZeroCountersi(void)  {  } | ||||||
|   double timer1; |   void Reporti(int calls)  {  } | ||||||
|   double timer2; |  | ||||||
|   double timer3; |  | ||||||
|   double timer4; |  | ||||||
|   double timer5; |  | ||||||
|   double timer6; |  | ||||||
|   uint64_t callsi; |  | ||||||
|   void ZeroCountersi(void) |  | ||||||
|   { |  | ||||||
|     timer0=0; |  | ||||||
|     timer1=0; |  | ||||||
|     timer2=0; |  | ||||||
|     timer3=0; |  | ||||||
|     timer4=0; |  | ||||||
|     timer5=0; |  | ||||||
|     timer6=0; |  | ||||||
|     callsi=0; |  | ||||||
|   } |  | ||||||
|   void Reporti(int calls) |  | ||||||
|   { |  | ||||||
|     if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl; |  | ||||||
|     if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate)   " <<timer1/calls <<std::endl; |  | ||||||
|     if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge )   " <<timer2/calls <<std::endl; |  | ||||||
|     if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl; |  | ||||||
|     if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::vector<int> surface_list; |   std::vector<int> surface_list; | ||||||
|  |  | ||||||
| @@ -321,26 +337,18 @@ public: | |||||||
|   { |   { | ||||||
|     std::vector<std::vector<CommsRequest_t> > reqs; |     std::vector<std::vector<CommsRequest_t> > reqs; | ||||||
|     this->HaloExchangeOptGather(source,compress); |     this->HaloExchangeOptGather(source,compress); | ||||||
|     double t1=usecond(); |  | ||||||
|     // Asynchronous MPI calls multidirectional, Isend etc... |     // Asynchronous MPI calls multidirectional, Isend etc... | ||||||
|     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. |     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. | ||||||
|     this->Communicate(); |     this->Communicate(); | ||||||
|     double t2=usecond(); timer1 += t2-t1; |  | ||||||
|     this->CommsMerge(compress); |     this->CommsMerge(compress); | ||||||
|     double t3=usecond(); timer2 += t3-t2; |  | ||||||
|     this->CommsMergeSHM(compress); |     this->CommsMergeSHM(compress); | ||||||
|     double t4=usecond(); timer3 += t4-t3; |  | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
|   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  |   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  | ||||||
|   { |   { | ||||||
|     this->Prepare(); |     this->Prepare(); | ||||||
|     double t0=usecond(); |  | ||||||
|     this->HaloGatherOpt(source,compress); |     this->HaloGatherOpt(source,compress); | ||||||
|     double t1=usecond(); |  | ||||||
|     timer0 += t1-t0; |  | ||||||
|     callsi++; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
| @@ -352,12 +360,9 @@ public: | |||||||
|     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; |     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; | ||||||
|     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; |     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; | ||||||
|  |  | ||||||
|     this->mpi3synctime_g-=usecond(); |  | ||||||
|     this->_grid->StencilBarrier(); |     this->_grid->StencilBarrier(); | ||||||
|     this->mpi3synctime_g+=usecond(); |  | ||||||
|  |  | ||||||
|     assert(source.Grid()==this->_grid); |     assert(source.Grid()==this->_grid); | ||||||
|     this->halogtime-=usecond(); |  | ||||||
|      |      | ||||||
|     this->u_comm_offset=0; |     this->u_comm_offset=0; | ||||||
|        |        | ||||||
| @@ -393,7 +398,6 @@ public: | |||||||
|     } |     } | ||||||
|     this->face_table_computed=1; |     this->face_table_computed=1; | ||||||
|     assert(this->u_comm_offset==this->_unified_buffer_size); |     assert(this->u_comm_offset==this->_unified_buffer_size); | ||||||
|     this->halogtime+=usecond(); |  | ||||||
|     accelerator_barrier(); |     accelerator_barrier(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -184,18 +184,22 @@ public: | |||||||
|       mat = TraceIndex<SpinIndex>(P);  |       mat = TraceIndex<SpinIndex>(P);  | ||||||
|     } |     } | ||||||
|        |        | ||||||
|     inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){ |     inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds) | ||||||
|  |     { | ||||||
|       for (int mu = 0; mu < Nd; mu++) |       for (int mu = 0; mu < Nd; mu++) | ||||||
|       mat[mu] = PeekIndex<LorentzIndex>(Uds, mu); |       mat[mu] = PeekIndex<LorentzIndex>(Uds, mu); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu) | ||||||
|   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ |   { | ||||||
|        | #undef USE_OLD_INSERT_FORCE     | ||||||
|     int Ls=Btilde.Grid()->_fdimensions[0]; |     int Ls=Btilde.Grid()->_fdimensions[0]; | ||||||
|  |     autoView( mat_v , mat, AcceleratorWrite); | ||||||
|  | #ifdef USE_OLD_INSERT_FORCE     | ||||||
|     GaugeLinkField tmp(mat.Grid()); |     GaugeLinkField tmp(mat.Grid()); | ||||||
|     tmp = Zero(); |     tmp = Zero(); | ||||||
|     { |     { | ||||||
|  |       const int Nsimd = SiteSpinor::Nsimd(); | ||||||
|       autoView( tmp_v , tmp, AcceleratorWrite); |       autoView( tmp_v , tmp, AcceleratorWrite); | ||||||
|       autoView( Btilde_v , Btilde, AcceleratorRead); |       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||||
|       autoView( Atilde_v , Atilde, AcceleratorRead); |       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||||
| @@ -208,6 +212,29 @@ public: | |||||||
| 	}); | 	}); | ||||||
|     } |     } | ||||||
|     PokeIndex<LorentzIndex>(mat,tmp,mu); |     PokeIndex<LorentzIndex>(mat,tmp,mu); | ||||||
|  | #else | ||||||
|  |     { | ||||||
|  |       const int Nsimd = SiteSpinor::Nsimd(); | ||||||
|  |       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||||
|  |       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||||
|  |       accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{ | ||||||
|  | 	  int sU=sss; | ||||||
|  |   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; | ||||||
|  |   	  ColorMatrixType sum; | ||||||
|  | 	  zeroit(sum);   | ||||||
|  | 	  for(int s=0;s<Ls;s++){ | ||||||
|  | 	    int sF = s+Ls*sU; | ||||||
|  |   	    for(int spn=0;spn<Ns;spn++){ //sum over spin | ||||||
|  |   	      auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector | ||||||
|  |   	      auto aa = coalescedRead(Atilde_v[sF]()(spn) ); | ||||||
|  | 	      auto op = outerProduct(bb,aa); | ||||||
|  |   	      sum = sum + op; | ||||||
|  | 	    } | ||||||
|  | 	  } | ||||||
|  |   	  coalescedWrite(mat_v[sU](mu)(), sum); | ||||||
|  |       }); | ||||||
|  |     } | ||||||
|  | #endif     | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -216,17 +243,17 @@ typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffReal > WilsonImplR | |||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | ||||||
|   |   | ||||||
| typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | ||||||
|   | |||||||
| @@ -880,17 +880,29 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   std::vector<RealD> G_s(Ls,1.0); |   std::vector<RealD> G_s(Ls,1.0); | ||||||
|  |   RealD sign = 1; // sign flip for vector/tadpole | ||||||
|   if ( curr_type == Current::Axial ) { |   if ( curr_type == Current::Axial ) { | ||||||
|     for(int s=0;s<Ls/2;s++){ |     for(int s=0;s<Ls/2;s++){ | ||||||
|       G_s[s] = -1.0; |       G_s[s] = -1.0; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |   else if ( curr_type == Current::Tadpole ) { | ||||||
|  |     auto b=this->_b; | ||||||
|  |     auto c=this->_c; | ||||||
|  |     if ( b == 1 && c == 0 ) { | ||||||
|  |       sign = -1;     | ||||||
|  |     } | ||||||
|  |     else { | ||||||
|  |       std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl; | ||||||
|  |       assert(b==1 && c==0); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|   for(int s=0;s<Ls;s++){ |   for(int s=0;s<Ls;s++){ | ||||||
|  |  | ||||||
|     int sp = (s+1)%Ls; |     int sp = (s+1)%Ls; | ||||||
|     int sr = Ls-1-s; |     //    int sr = Ls-1-s; | ||||||
|     int srp= (sr+1)%Ls; |     //    int srp= (sr+1)%Ls; | ||||||
|  |  | ||||||
|     // Mobius parameters |     // Mobius parameters | ||||||
|     auto b=this->bs[s]; |     auto b=this->bs[s]; | ||||||
| @@ -907,7 +919,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|  |  | ||||||
|     tmp    = Cshift(tmp,mu,1); |     tmp    = Cshift(tmp,mu,1); | ||||||
|     Impl::multLinkField(Utmp,this->Umu,tmp,mu); |     Impl::multLinkField(Utmp,this->Umu,tmp,mu); | ||||||
|     tmp    = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop |     tmp    = sign*G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop | ||||||
|     tmp    = where((lcoor>=tmin),tmp,zz); // Mask the time  |     tmp    = where((lcoor>=tmin),tmp,zz); // Mask the time  | ||||||
|     L_Q    = where((lcoor<=tmax),tmp,zz); // Position of current complicated |     L_Q    = where((lcoor<=tmax),tmp,zz); // Position of current complicated | ||||||
|  |  | ||||||
|   | |||||||
| @@ -680,7 +680,8 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st, | |||||||
|   gauge2 =(uint64_t)&UU[sU]( Z );				\ |   gauge2 =(uint64_t)&UU[sU]( Z );				\ | ||||||
|   gauge3 =(uint64_t)&UU[sU]( T );  |   gauge3 =(uint64_t)&UU[sU]( T );  | ||||||
|    |    | ||||||
|  | #undef STAG_VEC5D | ||||||
|  | #ifdef STAG_VEC5D | ||||||
|   // This is the single precision 5th direction vectorised kernel |   // This is the single precision 5th direction vectorised kernel | ||||||
| #include <Grid/simd/Intel512single.h> | #include <Grid/simd/Intel512single.h> | ||||||
| template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st, | template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st, | ||||||
| @@ -790,7 +791,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|     |     | ||||||
|     | #endif    | ||||||
|  |  | ||||||
|  |  | ||||||
| #define PERMUTE_DIR3 __asm__ (	\ | #define PERMUTE_DIR3 __asm__ (	\ | ||||||
|   | |||||||
| @@ -32,25 +32,50 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| #define LOAD_CHI(b)		\ | #ifdef GRID_SIMT | ||||||
|  |  | ||||||
|  | #define LOAD_CHI(ptype,b)			\ | ||||||
|  |   const SiteSpinor & ref (b[offset]);				\ | ||||||
|  |   Chi_0=coalescedReadPermute<ptype>(ref()()(0),perm,lane);	\ | ||||||
|  |   Chi_1=coalescedReadPermute<ptype>(ref()()(1),perm,lane);	\ | ||||||
|  |   Chi_2=coalescedReadPermute<ptype>(ref()()(2),perm,lane); | ||||||
|  |  | ||||||
|  | #define LOAD_CHI_COMMS(b)		\ | ||||||
|   const SiteSpinor & ref (b[offset]);	\ |   const SiteSpinor & ref (b[offset]);	\ | ||||||
|     Chi_0=ref()()(0);\ |   Chi_0=coalescedRead(ref()()(0),lane);	\ | ||||||
|     Chi_1=ref()()(1);\ |   Chi_1=coalescedRead(ref()()(1),lane);	\ | ||||||
|     Chi_2=ref()()(2); |   Chi_2=coalescedRead(ref()()(2),lane); | ||||||
|  |  | ||||||
|  | #define PERMUTE_DIR(dir)	; | ||||||
|  | #else | ||||||
|  | #define LOAD_CHI(ptype,b)      LOAD_CHI_COMMS(b) | ||||||
|  |  | ||||||
|  | #define LOAD_CHI_COMMS(b)		\ | ||||||
|  |   const SiteSpinor & ref (b[offset]);	\ | ||||||
|  |   Chi_0=ref()()(0);			\ | ||||||
|  |   Chi_1=ref()()(1);			\ | ||||||
|  |   Chi_2=ref()()(2); | ||||||
|  |  | ||||||
|  | #define PERMUTE_DIR(dir)			\ | ||||||
|  |   permute##dir(Chi_0,Chi_0);			\ | ||||||
|  |   permute##dir(Chi_1,Chi_1);			\ | ||||||
|  |   permute##dir(Chi_2,Chi_2); | ||||||
|  |  | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
| // To splat or not to splat depends on the implementation | // To splat or not to splat depends on the implementation | ||||||
| #define MULT(A,UChi)				\ | #define MULT(A,UChi)				\ | ||||||
|   auto & ref(U[sU](A));			\ |   auto & ref(U[sU](A));			\ | ||||||
|    Impl::loadLinkElement(U_00,ref()(0,0));      \ |     U_00=coalescedRead(ref()(0,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_10,ref()(1,0));      \ |     U_10=coalescedRead(ref()(1,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_20,ref()(2,0));      \ |     U_20=coalescedRead(ref()(2,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_01,ref()(0,1));      \ |     U_01=coalescedRead(ref()(0,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_11,ref()(1,1));      \ |     U_11=coalescedRead(ref()(1,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_21,ref()(2,1));      \ |     U_21=coalescedRead(ref()(2,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_02,ref()(0,2));     \ |     U_02=coalescedRead(ref()(0,2),lane);				\ | ||||||
|    Impl::loadLinkElement(U_12,ref()(1,2));     \ |     U_12=coalescedRead(ref()(1,2),lane);				\ | ||||||
|    Impl::loadLinkElement(U_22,ref()(2,2));     \ |     U_22=coalescedRead(ref()(2,2),lane);				\ | ||||||
|     UChi ## _0  = U_00*Chi_0;	       \ |     UChi ## _0  = U_00*Chi_0;	       \ | ||||||
|     UChi ## _1  = U_10*Chi_0;\ |     UChi ## _1  = U_10*Chi_0;\ | ||||||
|     UChi ## _2  = U_20*Chi_0;\ |     UChi ## _2  = U_20*Chi_0;\ | ||||||
| @@ -63,15 +88,15 @@ NAMESPACE_BEGIN(Grid); | |||||||
|  |  | ||||||
| #define MULT_ADD(U,A,UChi)			\ | #define MULT_ADD(U,A,UChi)			\ | ||||||
|   auto & ref(U[sU](A));			\ |   auto & ref(U[sU](A));			\ | ||||||
|    Impl::loadLinkElement(U_00,ref()(0,0));      \ |     U_00=coalescedRead(ref()(0,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_10,ref()(1,0));      \ |     U_10=coalescedRead(ref()(1,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_20,ref()(2,0));      \ |     U_20=coalescedRead(ref()(2,0),lane);				\ | ||||||
|    Impl::loadLinkElement(U_01,ref()(0,1));      \ |     U_01=coalescedRead(ref()(0,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_11,ref()(1,1));      \ |     U_11=coalescedRead(ref()(1,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_21,ref()(2,1));      \ |     U_21=coalescedRead(ref()(2,1),lane);				\ | ||||||
|    Impl::loadLinkElement(U_02,ref()(0,2));     \ |     U_02=coalescedRead(ref()(0,2),lane);				\ | ||||||
|    Impl::loadLinkElement(U_12,ref()(1,2));     \ |     U_12=coalescedRead(ref()(1,2),lane);				\ | ||||||
|    Impl::loadLinkElement(U_22,ref()(2,2));     \ |     U_22=coalescedRead(ref()(2,2),lane);				\ | ||||||
|     UChi ## _0 += U_00*Chi_0;	       \ |     UChi ## _0 += U_00*Chi_0;	       \ | ||||||
|     UChi ## _1 += U_10*Chi_0;\ |     UChi ## _1 += U_10*Chi_0;\ | ||||||
|     UChi ## _2 += U_20*Chi_0;\ |     UChi ## _2 += U_20*Chi_0;\ | ||||||
| @@ -83,24 +108,18 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     UChi ## _2 += U_22*Chi_2; |     UChi ## _2 += U_22*Chi_2; | ||||||
|  |  | ||||||
|  |  | ||||||
| #define PERMUTE_DIR(dir)			\ |  | ||||||
|   permute##dir(Chi_0,Chi_0);			\ |  | ||||||
|   permute##dir(Chi_1,Chi_1);			\ |  | ||||||
|   permute##dir(Chi_2,Chi_2); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #define HAND_STENCIL_LEG_BASE(Dir,Perm,skew)	\ | #define HAND_STENCIL_LEG_BASE(Dir,Perm,skew)	\ | ||||||
|   SE=st.GetEntry(ptype,Dir+skew,sF);	\ |   SE=st.GetEntry(ptype,Dir+skew,sF);	\ | ||||||
|   offset = SE->_offset;			\ |   offset = SE->_offset;			\ | ||||||
|   local  = SE->_is_local;		\ |   local  = SE->_is_local;		\ | ||||||
|   perm   = SE->_permute;		\ |   perm   = SE->_permute;		\ | ||||||
|   if ( local ) {						\ |   if ( local ) {						\ | ||||||
|     LOAD_CHI(in);					\ |     LOAD_CHI(Perm,in);						\ | ||||||
|     if ( perm) {						\ |     if ( perm) {						\ | ||||||
|       PERMUTE_DIR(Perm);					\ |       PERMUTE_DIR(Perm);					\ | ||||||
|     }								\ |     }								\ | ||||||
|   } else {							\ |   } else {							\ | ||||||
|     LOAD_CHI(buf);						\ |     LOAD_CHI_COMMS(buf);					\ | ||||||
|   }								 |   }								 | ||||||
|  |  | ||||||
| #define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even)		\ | #define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even)		\ | ||||||
| @@ -116,19 +135,18 @@ NAMESPACE_BEGIN(Grid); | |||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even)	\ | #define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even)	\ | ||||||
|   SE=st.GetEntry(ptype,Dir+skew,sF);			\ |   SE=st.GetEntry(ptype,Dir+skew,sF);			\ | ||||||
|   offset = SE->_offset;					\ |   offset = SE->_offset;					\ | ||||||
|   local  = SE->_is_local;				\ |   local  = SE->_is_local;				\ | ||||||
|   perm   = SE->_permute;				\ |   perm   = SE->_permute;				\ | ||||||
|   if ( local ) {					\ |   if ( local ) {					\ | ||||||
|     LOAD_CHI(in);				\ |     LOAD_CHI(Perm,in);					\ | ||||||
|     if ( perm) {					\ |     if ( perm) {					\ | ||||||
|       PERMUTE_DIR(Perm);				\ |       PERMUTE_DIR(Perm);				\ | ||||||
|     }							\ |     }							\ | ||||||
|   } else if ( st.same_node[Dir] ) {			\ |   } else if ( st.same_node[Dir] ) {			\ | ||||||
|     LOAD_CHI(buf);					\ |     LOAD_CHI_COMMS(buf);				\ | ||||||
|   }							\ |   }							\ | ||||||
|   if (local || st.same_node[Dir] ) {		\ |   if (local || st.same_node[Dir] ) {		\ | ||||||
|     MULT_ADD(U,Dir,even);				\ |     MULT_ADD(U,Dir,even);				\ | ||||||
| @@ -140,10 +158,32 @@ NAMESPACE_BEGIN(Grid); | |||||||
|   local  = SE->_is_local;				\ |   local  = SE->_is_local;				\ | ||||||
|   if ((!local) && (!st.same_node[Dir]) ) {		\ |   if ((!local) && (!st.same_node[Dir]) ) {		\ | ||||||
|     nmu++;							\ |     nmu++;							\ | ||||||
|     { LOAD_CHI(buf);	  }					\ |     { LOAD_CHI_COMMS(buf);	  }				\ | ||||||
|     { MULT_ADD(U,Dir,even); }					\ |     { MULT_ADD(U,Dir,even); }					\ | ||||||
|   }								 |   }								 | ||||||
|  |  | ||||||
|  | #define HAND_DECLARATIONS(Simd) \ | ||||||
|  |   Simd even_0;			\ | ||||||
|  |   Simd even_1;			\ | ||||||
|  |   Simd even_2;			\ | ||||||
|  |   Simd odd_0;			\ | ||||||
|  |   Simd odd_1;			\ | ||||||
|  |   Simd odd_2;		        \ | ||||||
|  | 		      		\ | ||||||
|  |   Simd Chi_0;			\ | ||||||
|  |   Simd Chi_1;			\ | ||||||
|  |   Simd Chi_2;			\ | ||||||
|  | 				\ | ||||||
|  |   Simd U_00;			\ | ||||||
|  |   Simd U_10;			\ | ||||||
|  |   Simd U_20;			\ | ||||||
|  |   Simd U_01;			\ | ||||||
|  |   Simd U_11;			\ | ||||||
|  |   Simd U_21;			\ | ||||||
|  |   Simd U_02;			\ | ||||||
|  |   Simd U_12;			\ | ||||||
|  |   Simd U_22;			 | ||||||
|  |    | ||||||
|  |  | ||||||
| template <class Impl> | template <class Impl> | ||||||
| template <int Naik> accelerator_inline | template <int Naik> accelerator_inline | ||||||
| @@ -155,28 +195,14 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st, | |||||||
|   typedef typename Simd::scalar_type S; |   typedef typename Simd::scalar_type S; | ||||||
|   typedef typename Simd::vector_type V; |   typedef typename Simd::vector_type V; | ||||||
|  |  | ||||||
|   Simd even_0; // 12 regs on knc |  | ||||||
|   Simd even_1; |  | ||||||
|   Simd even_2; |  | ||||||
|   Simd odd_0; // 12 regs on knc |  | ||||||
|   Simd odd_1; |  | ||||||
|   Simd odd_2; |  | ||||||
|  |  | ||||||
|   Simd Chi_0;    // two spinor; 6 regs |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|   Simd Chi_1; |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|   Simd Chi_2; |   typedef decltype( coalescedRead( in[0]()()(0) )) Simt; | ||||||
|    |   HAND_DECLARATIONS(Simt); | ||||||
|   Simd U_00;  // two rows of U matrix |  | ||||||
|   Simd U_10; |  | ||||||
|   Simd U_20;   |  | ||||||
|   Simd U_01; |  | ||||||
|   Simd U_11; |  | ||||||
|   Simd U_21;  // 2 reg left. |  | ||||||
|   Simd U_02; |  | ||||||
|   Simd U_12; |  | ||||||
|   Simd U_22;  |  | ||||||
|  |  | ||||||
|   SiteSpinor result; |   typedef decltype( coalescedRead( in[0] )) calcSiteSpinor; | ||||||
|  |   calcSiteSpinor result; | ||||||
|   int offset,local,perm, ptype; |   int offset,local,perm, ptype; | ||||||
|  |  | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
| @@ -215,7 +241,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st, | |||||||
|       result()()(1) = even_1 + odd_1; |       result()()(1) = even_1 + odd_1; | ||||||
|       result()()(2) = even_2 + odd_2; |       result()()(2) = even_2 + odd_2; | ||||||
|     } |     } | ||||||
|     vstream(out[sF],result); |     coalescedWrite(out[sF],result); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -230,28 +256,13 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st, | |||||||
|   typedef typename Simd::scalar_type S; |   typedef typename Simd::scalar_type S; | ||||||
|   typedef typename Simd::vector_type V; |   typedef typename Simd::vector_type V; | ||||||
|  |  | ||||||
|   Simd even_0; // 12 regs on knc |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|   Simd even_1; |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|   Simd even_2; |   typedef decltype( coalescedRead( in[0]()()(0) )) Simt; | ||||||
|   Simd odd_0; // 12 regs on knc |   HAND_DECLARATIONS(Simt); | ||||||
|   Simd odd_1; |  | ||||||
|   Simd odd_2; |  | ||||||
|  |  | ||||||
|   Simd Chi_0;    // two spinor; 6 regs |   typedef decltype( coalescedRead( in[0] )) calcSiteSpinor; | ||||||
|   Simd Chi_1; |   calcSiteSpinor result; | ||||||
|   Simd Chi_2; |  | ||||||
|    |  | ||||||
|   Simd U_00;  // two rows of U matrix |  | ||||||
|   Simd U_10; |  | ||||||
|   Simd U_20;   |  | ||||||
|   Simd U_01; |  | ||||||
|   Simd U_11; |  | ||||||
|   Simd U_21;  // 2 reg left. |  | ||||||
|   Simd U_02; |  | ||||||
|   Simd U_12; |  | ||||||
|   Simd U_22;  |  | ||||||
|  |  | ||||||
|   SiteSpinor result; |  | ||||||
|   int offset, ptype, local, perm; |   int offset, ptype, local, perm; | ||||||
|  |  | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
| @@ -261,8 +272,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st, | |||||||
|   //    int sF=s+LLs*sU; |   //    int sF=s+LLs*sU; | ||||||
|   { |   { | ||||||
|  |  | ||||||
|     even_0 = Zero();    even_1 = Zero();    even_2 = Zero(); |     zeroit(even_0);    zeroit(even_1);    zeroit(even_2); | ||||||
|      odd_0 = Zero();     odd_1 = Zero();     odd_2 = Zero(); |     zeroit(odd_0);    zeroit(odd_1);    zeroit(odd_2); | ||||||
|  |  | ||||||
|     skew = 0; |     skew = 0; | ||||||
|     HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);   |     HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);   | ||||||
| @@ -294,7 +305,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st, | |||||||
|       result()()(1) = even_1 + odd_1; |       result()()(1) = even_1 + odd_1; | ||||||
|       result()()(2) = even_2 + odd_2; |       result()()(2) = even_2 + odd_2; | ||||||
|     } |     } | ||||||
|     vstream(out[sF],result); |     coalescedWrite(out[sF],result); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -309,28 +320,13 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st, | |||||||
|   typedef typename Simd::scalar_type S; |   typedef typename Simd::scalar_type S; | ||||||
|   typedef typename Simd::vector_type V; |   typedef typename Simd::vector_type V; | ||||||
|  |  | ||||||
|   Simd even_0; // 12 regs on knc |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|   Simd even_1; |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|   Simd even_2; |   typedef decltype( coalescedRead( in[0]()()(0) )) Simt; | ||||||
|   Simd odd_0; // 12 regs on knc |   HAND_DECLARATIONS(Simt); | ||||||
|   Simd odd_1; |  | ||||||
|   Simd odd_2; |  | ||||||
|  |  | ||||||
|   Simd Chi_0;    // two spinor; 6 regs |   typedef decltype( coalescedRead( in[0] )) calcSiteSpinor; | ||||||
|   Simd Chi_1; |   calcSiteSpinor result; | ||||||
|   Simd Chi_2; |  | ||||||
|    |  | ||||||
|   Simd U_00;  // two rows of U matrix |  | ||||||
|   Simd U_10; |  | ||||||
|   Simd U_20;   |  | ||||||
|   Simd U_01; |  | ||||||
|   Simd U_11; |  | ||||||
|   Simd U_21;  // 2 reg left. |  | ||||||
|   Simd U_02; |  | ||||||
|   Simd U_12; |  | ||||||
|   Simd U_22;  |  | ||||||
|  |  | ||||||
|   SiteSpinor result; |  | ||||||
|   int offset, ptype, local; |   int offset, ptype, local; | ||||||
|  |  | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
| @@ -340,8 +336,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st, | |||||||
|   //    int sF=s+LLs*sU; |   //    int sF=s+LLs*sU; | ||||||
|   { |   { | ||||||
|  |  | ||||||
|     even_0 = Zero();    even_1 = Zero();    even_2 = Zero(); |     zeroit(even_0);    zeroit(even_1);    zeroit(even_2); | ||||||
|      odd_0 = Zero();     odd_1 = Zero();     odd_2 = Zero(); |     zeroit(odd_0);    zeroit(odd_1);    zeroit(odd_2); | ||||||
|     int nmu=0; |     int nmu=0; | ||||||
|     skew = 0; |     skew = 0; | ||||||
|     HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);   |     HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);   | ||||||
| @@ -374,7 +370,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st, | |||||||
| 	result()()(1) = even_1 + odd_1; | 	result()()(1) = even_1 + odd_1; | ||||||
| 	result()()(2) = even_2 + odd_2; | 	result()()(2) = even_2 + odd_2; | ||||||
|       } |       } | ||||||
|       out[sF] = out[sF] + result; |       coalescedWrite(out[sF] , out(sF)+ result); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -397,6 +393,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st, | |||||||
| 						     const FermionFieldView &in, FermionFieldView &out, int dag); \ | 						     const FermionFieldView &in, FermionFieldView &out, int dag); \ | ||||||
| */ | */ | ||||||
| #undef LOAD_CHI | #undef LOAD_CHI | ||||||
|  | #undef HAND_DECLARATIONS | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid); | |||||||
| #define GENERIC_STENCIL_LEG(U,Dir,skew,multLink)		\ | #define GENERIC_STENCIL_LEG(U,Dir,skew,multLink)		\ | ||||||
|   SE = st.GetEntry(ptype, Dir+skew, sF);			\ |   SE = st.GetEntry(ptype, Dir+skew, sF);			\ | ||||||
|   if (SE->_is_local ) {						\ |   if (SE->_is_local ) {						\ | ||||||
|     if (SE->_permute) {						\ |     int perm= SE->_permute;						\ | ||||||
|       chi_p = χ						\ |     chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\ | ||||||
|       permute(chi,  in[SE->_offset], ptype);			\ |  | ||||||
|     } else {							\ |  | ||||||
|       chi_p = &in[SE->_offset];					\ |  | ||||||
|     }								\ |  | ||||||
|   } else {							\ |   } else {							\ | ||||||
|     chi_p = &buf[SE->_offset];					\ |     chi = coalescedRead(buf[SE->_offset],lane);			\ | ||||||
|   }								\ |   }								\ | ||||||
|   multLink(Uchi, U[sU], *chi_p, Dir);			 |   acceleratorSynchronise();					\ | ||||||
|  |   multLink(Uchi, U[sU], chi, Dir);			 | ||||||
|  |  | ||||||
| #define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink)		\ | #define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink)		\ | ||||||
|   SE = st.GetEntry(ptype, Dir+skew, sF);			\ |   SE = st.GetEntry(ptype, Dir+skew, sF);			\ | ||||||
|   if (SE->_is_local ) {						\ |   if (SE->_is_local ) {						\ | ||||||
|     if (SE->_permute) {						\ |     int perm= SE->_permute;						\ | ||||||
|       chi_p = χ						\ |     chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\ | ||||||
|       permute(chi,  in[SE->_offset], ptype);			\ |  | ||||||
|     } else {							\ |  | ||||||
|       chi_p = &in[SE->_offset];					\ |  | ||||||
|     }								\ |  | ||||||
|   } else if ( st.same_node[Dir] ) {				\ |   } else if ( st.same_node[Dir] ) {				\ | ||||||
|     chi_p = &buf[SE->_offset];					\ |     chi = coalescedRead(buf[SE->_offset],lane);                 \ | ||||||
|   }								\ |   }								\ | ||||||
|   if (SE->_is_local || st.same_node[Dir] ) {			\ |   if (SE->_is_local || st.same_node[Dir] ) {			\ | ||||||
|     multLink(Uchi, U[sU], *chi_p, Dir);				\ |     multLink(Uchi, U[sU], chi, Dir);				\ | ||||||
|   } |   } | ||||||
|  |  | ||||||
| #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink)		\ | #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink)		\ | ||||||
|   SE = st.GetEntry(ptype, Dir+skew, sF);			\ |   SE = st.GetEntry(ptype, Dir+skew, sF);			\ | ||||||
|   if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\ |   if ((!SE->_is_local) && (!st.same_node[Dir]) ) {		\ | ||||||
|     nmu++;							\ |     nmu++;							\ | ||||||
|     chi_p = &buf[SE->_offset];					\ |     chi = coalescedRead(buf[SE->_offset],lane);			\ | ||||||
|     multLink(Uchi, U[sU], *chi_p, Dir);				\ |     multLink(Uchi, U[sU], chi, Dir);				\ | ||||||
|   } |   } | ||||||
|  |  | ||||||
| template <class Impl> | template <class Impl> | ||||||
| @@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st, | |||||||
| 					     SiteSpinor *buf, int sF, int sU,  | 					     SiteSpinor *buf, int sF, int sU,  | ||||||
| 					     const FermionFieldView &in, FermionFieldView &out, int dag)  | 					     const FermionFieldView &in, FermionFieldView &out, int dag)  | ||||||
| { | { | ||||||
|   const SiteSpinor *chi_p; |   typedef decltype(coalescedRead(in[0])) calcSpinor; | ||||||
|   SiteSpinor chi; |   calcSpinor chi; | ||||||
|   SiteSpinor Uchi; |   calcSpinor Uchi; | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
|   int ptype; |   int ptype; | ||||||
|   int skew; |   int skew; | ||||||
|  |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|  |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|   //  for(int s=0;s<LLs;s++){ |   //  for(int s=0;s<LLs;s++){ | ||||||
|   // |   // | ||||||
| @@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st, | |||||||
|     if ( dag ) {  |     if ( dag ) {  | ||||||
|       Uchi = - Uchi; |       Uchi = - Uchi; | ||||||
|     }  |     }  | ||||||
|     vstream(out[sF], Uchi); |     coalescedWrite(out[sF], Uchi,lane); | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -130,13 +125,16 @@ template <int Naik> accelerator_inline | |||||||
| void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,  | void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,  | ||||||
| 						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, | 						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, | ||||||
| 						SiteSpinor *buf, int sF, int sU,  | 						SiteSpinor *buf, int sF, int sU,  | ||||||
| 						const FermionFieldView &in, FermionFieldView &out,int dag) { | 						const FermionFieldView &in, FermionFieldView &out,int dag) | ||||||
|   const SiteSpinor *chi_p; | { | ||||||
|   SiteSpinor chi; |   typedef decltype(coalescedRead(in[0])) calcSpinor; | ||||||
|   SiteSpinor Uchi; |   calcSpinor chi; | ||||||
|  |   calcSpinor Uchi; | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
|   int ptype; |   int ptype; | ||||||
|   int skew ; |   int skew ; | ||||||
|  |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|  |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|   //  for(int s=0;s<LLs;s++){ |   //  for(int s=0;s<LLs;s++){ | ||||||
|   //    int sF=LLs*sU+s; |   //    int sF=LLs*sU+s; | ||||||
| @@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st, | |||||||
|     if ( dag ) { |     if ( dag ) { | ||||||
|       Uchi = - Uchi; |       Uchi = - Uchi; | ||||||
|     } |     } | ||||||
|     vstream(out[sF], Uchi); |     coalescedWrite(out[sF], Uchi,lane); | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -178,14 +176,17 @@ template <int Naik> accelerator_inline | |||||||
| void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,  | void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,  | ||||||
| 						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, | 						DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, | ||||||
| 						SiteSpinor *buf, int sF, int sU, | 						SiteSpinor *buf, int sF, int sU, | ||||||
| 						const FermionFieldView &in, FermionFieldView &out,int dag) { | 						const FermionFieldView &in, FermionFieldView &out,int dag) | ||||||
|   const SiteSpinor *chi_p; | { | ||||||
|   //  SiteSpinor chi; |   typedef decltype(coalescedRead(in[0])) calcSpinor; | ||||||
|   SiteSpinor Uchi; |   calcSpinor chi; | ||||||
|  |   calcSpinor Uchi; | ||||||
|   StencilEntry *SE; |   StencilEntry *SE; | ||||||
|   int ptype; |   int ptype; | ||||||
|   int nmu=0; |   int nmu=0; | ||||||
|   int skew ; |   int skew ; | ||||||
|  |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|  |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|  |  | ||||||
|   //  for(int s=0;s<LLs;s++){ |   //  for(int s=0;s<LLs;s++){ | ||||||
|   //    int sF=LLs*sU+s; |   //    int sF=LLs*sU+s; | ||||||
| @@ -211,11 +212,12 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st, | |||||||
|     GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); |     GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); | ||||||
|     GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); |     GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); | ||||||
|     } |     } | ||||||
|     if ( nmu ) {  |     if ( nmu ) { | ||||||
|       if ( dag ) {  |       auto _out = coalescedRead(out[sF],lane); | ||||||
| 	out[sF] = out[sF] - Uchi; |       if ( dag ) { | ||||||
|  | 	coalescedWrite(out[sF], _out-Uchi,lane); | ||||||
|       } else {  |       } else {  | ||||||
| 	out[sF] = out[sF] + Uchi; | 	coalescedWrite(out[sF], _out+Uchi,lane); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo, | |||||||
|   GridBase *FGrid=in.Grid();   |   GridBase *FGrid=in.Grid();   | ||||||
|   GridBase *UGrid=U.Grid();   |   GridBase *UGrid=U.Grid();   | ||||||
|   typedef StaggeredKernels<Impl> ThisKernel; |   typedef StaggeredKernels<Impl> ThisKernel; | ||||||
|  |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|  |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|   autoView( UUU_v , UUU, AcceleratorRead); |   autoView( UUU_v , UUU, AcceleratorRead); | ||||||
|   autoView( U_v   ,   U, AcceleratorRead); |   autoView( U_v   ,   U, AcceleratorRead); | ||||||
|   autoView( in_v  ,  in, AcceleratorRead); |   autoView( in_v  ,  in, AcceleratorRead); | ||||||
| @@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo, | |||||||
|   GridBase *FGrid=in.Grid();   |   GridBase *FGrid=in.Grid();   | ||||||
|   GridBase *UGrid=U.Grid();   |   GridBase *UGrid=U.Grid();   | ||||||
|   typedef StaggeredKernels<Impl> ThisKernel; |   typedef StaggeredKernels<Impl> ThisKernel; | ||||||
|  |   const int Nsimd = SiteHalfSpinor::Nsimd(); | ||||||
|  |   const int lane=acceleratorSIMTlane(Nsimd); | ||||||
|   autoView( UUU_v ,   U, AcceleratorRead); |   autoView( UUU_v ,   U, AcceleratorRead); | ||||||
|   autoView( U_v   ,   U, AcceleratorRead); |   autoView( U_v   ,   U, AcceleratorRead); | ||||||
|   autoView( in_v  ,  in, AcceleratorRead); |   autoView( in_v  ,  in, AcceleratorRead); | ||||||
|   | |||||||
| @@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| @@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
| @@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonKernelsInstantiationGparity.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION GparityWilsonImplDF |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonKernelsInstantiationGparity.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION GparityWilsonImplFH |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1,51 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015, 2020 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
|  |  | ||||||
| #ifndef AVX512 |  | ||||||
| #ifndef QPX |  | ||||||
| #ifndef A64FX |  | ||||||
| #ifndef A64FXFIXEDSIZE |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| #include "impl.h" |  | ||||||
| template class WilsonKernels<IMPLEMENTATION>; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION WilsonImplDF |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1,51 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015, 2020 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
|  |  | ||||||
| #ifndef AVX512 |  | ||||||
| #ifndef QPX |  | ||||||
| #ifndef A64FX |  | ||||||
| #ifndef A64FXFIXEDSIZE |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| #include "impl.h" |  | ||||||
| template class WilsonKernels<IMPLEMENTATION>; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION WilsonImplFH |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1,51 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc |  | ||||||
|  |  | ||||||
| Copyright (C) 2015, 2020 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
|  |  | ||||||
| #ifndef AVX512 |  | ||||||
| #ifndef QPX |  | ||||||
| #ifndef A64FX |  | ||||||
| #ifndef A64FXFIXEDSIZE |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| #include "impl.h" |  | ||||||
| template class WilsonKernels<IMPLEMENTATION>; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION ZWilsonImplDF |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION ZWilsonImplFH |  | ||||||
| @@ -9,8 +9,6 @@ STAG5_IMPL_LIST="" | |||||||
| WILSON_IMPL_LIST=" \ | WILSON_IMPL_LIST=" \ | ||||||
| 	   WilsonImplF \ | 	   WilsonImplF \ | ||||||
| 	   WilsonImplD \ | 	   WilsonImplD \ | ||||||
| 	   WilsonImplFH \ |  | ||||||
| 	   WilsonImplDF \ |  | ||||||
| 	   WilsonAdjImplF \ | 	   WilsonAdjImplF \ | ||||||
| 	   WilsonAdjImplD \ | 	   WilsonAdjImplD \ | ||||||
| 	   WilsonTwoIndexSymmetricImplF \ | 	   WilsonTwoIndexSymmetricImplF \ | ||||||
| @@ -18,26 +16,17 @@ WILSON_IMPL_LIST=" \ | |||||||
| 	   WilsonTwoIndexAntiSymmetricImplF \ | 	   WilsonTwoIndexAntiSymmetricImplF \ | ||||||
| 	   WilsonTwoIndexAntiSymmetricImplD \ | 	   WilsonTwoIndexAntiSymmetricImplD \ | ||||||
| 	   GparityWilsonImplF \ | 	   GparityWilsonImplF \ | ||||||
| 	   GparityWilsonImplD \ | 	   GparityWilsonImplD " | ||||||
| 	   GparityWilsonImplFH \ |  | ||||||
| 	   GparityWilsonImplDF" |  | ||||||
|  |  | ||||||
| DWF_IMPL_LIST=" \ | DWF_IMPL_LIST=" \ | ||||||
| 	   WilsonImplF \ | 	   WilsonImplF \ | ||||||
| 	   WilsonImplD \ | 	   WilsonImplD \ | ||||||
| 	   WilsonImplFH \ |  | ||||||
| 	   WilsonImplDF \ |  | ||||||
| 	   ZWilsonImplF \ | 	   ZWilsonImplF \ | ||||||
| 	   ZWilsonImplD \ | 	   ZWilsonImplD " | ||||||
| 	   ZWilsonImplFH \ |  | ||||||
| 	   ZWilsonImplDF " |  | ||||||
|  |  | ||||||
| GDWF_IMPL_LIST=" \ | GDWF_IMPL_LIST=" \ | ||||||
| 	   GparityWilsonImplF \ | 	   GparityWilsonImplF \ | ||||||
| 	   GparityWilsonImplD \ | 	   GparityWilsonImplD " | ||||||
| 	   GparityWilsonImplFH \ |  | ||||||
| 	   GparityWilsonImplDF" |  | ||||||
|  |  | ||||||
|  |  | ||||||
| IMPL_LIST="$STAG_IMPL_LIST  $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST" | IMPL_LIST="$STAG_IMPL_LIST  $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST" | ||||||
|  |  | ||||||
|   | |||||||
| @@ -78,6 +78,8 @@ public: | |||||||
|   typedef Lattice<SiteLink>    LinkField;  |   typedef Lattice<SiteLink>    LinkField;  | ||||||
|   typedef Lattice<SiteField>   Field; |   typedef Lattice<SiteField>   Field; | ||||||
|  |  | ||||||
|  |   typedef SU<Nrepresentation> Group; | ||||||
|  |  | ||||||
|   // Guido: we can probably separate the types from the HMC functions |   // Guido: we can probably separate the types from the HMC functions | ||||||
|   // this will create 2 kind of implementations |   // this will create 2 kind of implementations | ||||||
|   // probably confusing the users |   // probably confusing the users | ||||||
| @@ -118,7 +120,7 @@ public: | |||||||
|     LinkField Pmu(P.Grid()); |     LinkField Pmu(P.Grid()); | ||||||
|     Pmu = Zero(); |     Pmu = Zero(); | ||||||
|     for (int mu = 0; mu < Nd; mu++) { |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|       SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); |       Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); | ||||||
|       RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; |       RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; | ||||||
|       Pmu = Pmu*scale; |       Pmu = Pmu*scale; | ||||||
|       PokeIndex<LorentzIndex>(P, Pmu, mu); |       PokeIndex<LorentzIndex>(P, Pmu, mu); | ||||||
| @@ -159,15 +161,15 @@ public: | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { |   static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||||
|     SU<Nc>::HotConfiguration(pRNG, U); |     Group::HotConfiguration(pRNG, U); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { |   static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||||
|     SU<Nc>::TepidConfiguration(pRNG, U); |     Group::TepidConfiguration(pRNG, U); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { |   static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { | ||||||
|     SU<Nc>::ColdConfiguration(pRNG, U); |     Group::ColdConfiguration(pRNG, U); | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -85,21 +85,18 @@ public: | |||||||
|  |  | ||||||
|     std::cout << GridLogDebug << "Stout smearing started\n"; |     std::cout << GridLogDebug << "Stout smearing started\n"; | ||||||
|  |  | ||||||
|     // Smear the configurations |     // C contains the staples multiplied by some rho | ||||||
|  |     u_smr = U ; // set the smeared field to the current gauge field | ||||||
|     SmearBase->smear(C, U); |     SmearBase->smear(C, U); | ||||||
|  |  | ||||||
|     for (int mu = 0; mu < Nd; mu++) { |     for (int mu = 0; mu < Nd; mu++) { | ||||||
|       if( mu == OrthogDim ) |       if( mu == OrthogDim ) continue ; | ||||||
|         tmp = 1.0;  // Don't smear in the orthogonal direction |       // u_smr = exp(iQ_mu)*U_mu apart from Orthogdim | ||||||
|       else { |       Umu = peekLorentz(U, mu); | ||||||
|         tmp = peekLorentz(C, mu); |       tmp = peekLorentz(C, mu); | ||||||
|         Umu = peekLorentz(U, mu); |       iq_mu = Ta( tmp * adj(Umu));   | ||||||
|         iq_mu = Ta( |       exponentiate_iQ(tmp, iq_mu); | ||||||
|                    tmp * |       pokeLorentz(u_smr, tmp * Umu, mu); | ||||||
|                    adj(Umu));  // iq_mu = Ta(Omega_mu) to match the signs with the paper |  | ||||||
|         exponentiate_iQ(tmp, iq_mu); |  | ||||||
|       } |  | ||||||
|       pokeLorentz(u_smr, tmp * Umu, mu);  // u_smr = exp(iQ_mu)*U_mu |  | ||||||
|     } |     } | ||||||
|     std::cout << GridLogDebug << "Stout smearing completed\n"; |     std::cout << GridLogDebug << "Stout smearing completed\n"; | ||||||
|   }; |   }; | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| // Dirac algebra adjoint operator (not in  to overload other adj) | // Dirac algebra adjoint operator (not in  to overload other adj) | ||||||
| accelerator_inline Gamma adj(const Gamma &g) | inline Gamma adj(const Gamma &g) | ||||||
| { | { | ||||||
|   return Gamma (Gamma::adj[g.g]); |   return Gamma (Gamma::adj[g.g]); | ||||||
| } | } | ||||||
| @@ -48,7 +48,7 @@ accelerator_inline Gamma adj(const Gamma &g) | |||||||
|  |  | ||||||
|  |  | ||||||
| // Dirac algebra mutliplication operator | // Dirac algebra mutliplication operator | ||||||
| accelerator_inline Gamma operator*(const Gamma &g1, const Gamma &g2) | inline Gamma operator*(const Gamma &g1, const Gamma &g2) | ||||||
| { | { | ||||||
|   return Gamma (Gamma::mul[g1.g][g2.g]); |   return Gamma (Gamma::mul[g1.g][g2.g]); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2,14 +2,11 @@ | |||||||
| 
 | 
 | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
| 
 | 
 | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | Source file: ./lib/serialisation/BaseIO.h | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2015, 2020 | Copyright (C) 2015 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Michael Marshall <michael.marshall@ed.ac.uk> | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
| 
 | 
 | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -25,27 +22,14 @@ You should have received a copy of the GNU General Public License along | |||||||
| with this program; if not, write to the Free Software Foundation, Inc., | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
| 
 | 
 | ||||||
| See the full license in the file "LICENSE" in the top level distribution | See the full license in the file "LICENSE" in the top level distribution directory | ||||||
| directory |  | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
| 
 | 
 | ||||||
| #ifndef AVX512 | #include <Grid/GridCore.h> | ||||||
| #ifndef QPX |  | ||||||
| #ifndef A64FX |  | ||||||
| #ifndef A64FXFIXEDSIZE |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid) | ||||||
| 
 | 
 | ||||||
| #include "impl.h" | std::uint64_t EigenIO::EigenResizeCounter(0); | ||||||
| template class WilsonKernels<IMPLEMENTATION>; |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid) | ||||||
| @@ -9,6 +9,7 @@ | |||||||
| Author: Antonin Portelli <antonin.portelli@me.com> | Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  | Author: Michael Marshall <michael.marshall@ed.ac.uk> | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |     This program is free software; you can redistribute it and/or modify | ||||||
|     it under the terms of the GNU General Public License as published by |     it under the terms of the GNU General Public License as published by | ||||||
| @@ -30,6 +31,7 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk> | |||||||
| #ifndef GRID_SERIALISATION_ABSTRACT_READER_H | #ifndef GRID_SERIALISATION_ABSTRACT_READER_H | ||||||
| #define GRID_SERIALISATION_ABSTRACT_READER_H | #define GRID_SERIALISATION_ABSTRACT_READER_H | ||||||
|  |  | ||||||
|  | #include <atomic> | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| #include <Grid/tensors/Tensors.h> | #include <Grid/tensors/Tensors.h> | ||||||
| #include <Grid/serialisation/VectorUtils.h> | #include <Grid/serialisation/VectorUtils.h> | ||||||
| @@ -110,6 +112,10 @@ namespace Grid { | |||||||
|     template <typename ET> |     template <typename ET> | ||||||
|     inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type |     inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type | ||||||
|     getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); } |     getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); } | ||||||
|  |  | ||||||
|  |     // Counter for resized EigenTensors (poor man's substitute for allocator) | ||||||
|  |     // Defined in BinaryIO.cc | ||||||
|  |     extern std::uint64_t EigenResizeCounter; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Abstract writer/reader classes //////////////////////////////////////////// |   // Abstract writer/reader classes //////////////////////////////////////////// | ||||||
| @@ -497,8 +503,14 @@ namespace Grid { | |||||||
|   typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type |   typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type | ||||||
|   Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) |   Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) | ||||||
|   { |   { | ||||||
|  | #ifdef GRID_OMP | ||||||
|  |     // The memory counter is the reason this must be done from the primary thread | ||||||
|  |     assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread"); | ||||||
|  | #endif | ||||||
|  |     EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar); | ||||||
|     //t.reshape( dims ); |     //t.reshape( dims ); | ||||||
|     t.resize( dims ); |     t.resize( dims ); | ||||||
|  |     EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <typename T> |   template <typename T> | ||||||
|   | |||||||
| @@ -1,8 +1,39 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |   | ||||||
|  |  Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |   | ||||||
|  |  Source file: ./Grid/serialisation/VectorUtils.h | ||||||
|  |   | ||||||
|  |  Copyright (C) 2015 | ||||||
|  |   | ||||||
|  |  Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  Author: Peter Boyle <paboyle@ed.ac.uk> | ||||||
|  |  Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  Author: Michael Marshall <michael.marshall@ed.ac.uk> | ||||||
|  |  | ||||||
|  |  This program is free software; you can redistribute it and/or modify | ||||||
|  |  it under the terms of the GNU General Public License as published by | ||||||
|  |  the Free Software Foundation; either version 2 of the License, or | ||||||
|  |  (at your option) any later version. | ||||||
|  |   | ||||||
|  |  This program is distributed in the hope that it will be useful, | ||||||
|  |  but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  GNU General Public License for more details. | ||||||
|  |   | ||||||
|  |  You should have received a copy of the GNU General Public License along | ||||||
|  |  with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |   | ||||||
|  |  See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |  *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #include <Grid/Grid.h> | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
| #ifndef H5_NO_NAMESPACE | #ifndef H5_NO_NAMESPACE | ||||||
| using namespace H5NS; | using namespace H5NS; // Compile error here? Try adding --enable-cxx to hdf5 configure | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| // Writer implementation /////////////////////////////////////////////////////// | // Writer implementation /////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -1,3 +1,34 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |   | ||||||
|  |  Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |   | ||||||
|  |  Source file: ./Grid/serialisation/VectorUtils.h | ||||||
|  |   | ||||||
|  |  Copyright (C) 2015 | ||||||
|  |   | ||||||
|  |  Author: Peter Boyle <paboyle@ed.ac.uk> | ||||||
|  |  Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  |  Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  Author: Michael Marshall <michael.marshall@ed.ac.uk> | ||||||
|  |  | ||||||
|  |  This program is free software; you can redistribute it and/or modify | ||||||
|  |  it under the terms of the GNU General Public License as published by | ||||||
|  |  the Free Software Foundation; either version 2 of the License, or | ||||||
|  |  (at your option) any later version. | ||||||
|  |   | ||||||
|  |  This program is distributed in the hope that it will be useful, | ||||||
|  |  but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  GNU General Public License for more details. | ||||||
|  |   | ||||||
|  |  You should have received a copy of the GNU General Public License along | ||||||
|  |  with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |   | ||||||
|  |  See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |  *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  |  | ||||||
| #ifndef GRID_SERIALISATION_HDF5_H | #ifndef GRID_SERIALISATION_HDF5_H | ||||||
| #define GRID_SERIALISATION_HDF5_H | #define GRID_SERIALISATION_HDF5_H | ||||||
|  |  | ||||||
| @@ -9,10 +40,6 @@ | |||||||
| #include <Grid/tensors/Tensors.h> | #include <Grid/tensors/Tensors.h> | ||||||
| #include "Hdf5Type.h" | #include "Hdf5Type.h" | ||||||
|  |  | ||||||
| #ifndef H5_NO_NAMESPACE |  | ||||||
| #define H5NS H5 |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| // default thresold above which datasets are used instead of attributes | // default thresold above which datasets are used instead of attributes | ||||||
| #ifndef HDF5_DEF_DATASET_THRES | #ifndef HDF5_DEF_DATASET_THRES | ||||||
| #define HDF5_DEF_DATASET_THRES 6u | #define HDF5_DEF_DATASET_THRES 6u | ||||||
| @@ -34,11 +61,13 @@ namespace Grid | |||||||
|     template <typename U> |     template <typename U> | ||||||
|     void writeDefault(const std::string &s, const U &x); |     void writeDefault(const std::string &s, const U &x); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     typename std::enable_if<element<std::vector<U>>::is_number, void>::type |     void writeRagged(const std::string &s, const std::vector<U> &x); | ||||||
|  |     template <typename U> | ||||||
|  |     typename std::enable_if<is_flattenable<std::vector<U>>::value>::type | ||||||
|     writeDefault(const std::string &s, const std::vector<U> &x); |     writeDefault(const std::string &s, const std::vector<U> &x); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type |     typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type | ||||||
|     writeDefault(const std::string &s, const std::vector<U> &x); |     writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); } | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); |     void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); | ||||||
|     H5NS::Group & getGroup(void); |     H5NS::Group & getGroup(void); | ||||||
| @@ -64,11 +93,13 @@ namespace Grid | |||||||
|     template <typename U> |     template <typename U> | ||||||
|     void readDefault(const std::string &s, U &output); |     void readDefault(const std::string &s, U &output); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     typename std::enable_if<element<std::vector<U>>::is_number, void>::type |     void readRagged(const std::string &s, std::vector<U> &x); | ||||||
|  |     template <typename U> | ||||||
|  |     typename std::enable_if<is_flattenable<std::vector<U>>::value>::type | ||||||
|     readDefault(const std::string &s, std::vector<U> &x); |     readDefault(const std::string &s, std::vector<U> &x); | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type |     typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type | ||||||
|     readDefault(const std::string &s, std::vector<U> &x); |     readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); } | ||||||
|     template <typename U> |     template <typename U> | ||||||
|     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); |     void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); | ||||||
|     H5NS::Group & getGroup(void); |     H5NS::Group & getGroup(void); | ||||||
| @@ -176,24 +207,30 @@ namespace Grid | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   typename std::enable_if<element<std::vector<U>>::is_number, void>::type |   typename std::enable_if<is_flattenable<std::vector<U>>::value>::type | ||||||
|   Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x) |   Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x) | ||||||
|   { |   { | ||||||
|     // alias to element type |     if (isRegularShape(x)) | ||||||
|     typedef typename element<std::vector<U>>::type Element; |     { | ||||||
|      |       // alias to element type | ||||||
|     // flatten the vector and getting dimensions |       using Scalar = typename is_flattenable<std::vector<U>>::type; | ||||||
|     Flatten<std::vector<U>> flat(x); |        | ||||||
|     std::vector<size_t> dim; |       // flatten the vector and getting dimensions | ||||||
|     const auto           &flatx = flat.getFlatVector(); |       Flatten<std::vector<U>> flat(x); | ||||||
|     for (auto &d: flat.getDim()) |       std::vector<size_t> dim; | ||||||
|       dim.push_back(d); |       const auto           &flatx = flat.getFlatVector(); | ||||||
|     writeMultiDim<Element>(s, dim, &flatx[0], flatx.size()); |       for (auto &d: flat.getDim()) | ||||||
|  |         dim.push_back(d); | ||||||
|  |       writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size()); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |       writeRagged(s, x); | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   typename std::enable_if<!element<std::vector<U>>::is_number, void>::type |   void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x) | ||||||
|   Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x) |  | ||||||
|   { |   { | ||||||
|     push(s); |     push(s); | ||||||
|     writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", |     writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", | ||||||
| @@ -229,7 +266,7 @@ namespace Grid | |||||||
|   void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) |   void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) | ||||||
|   { |   { | ||||||
|     // alias to element type |     // alias to element type | ||||||
|     typedef typename element<std::vector<U>>::type Element; |     using Scalar = typename is_flattenable<std::vector<U>>::type; | ||||||
|      |      | ||||||
|     // read the dimensions |     // read the dimensions | ||||||
|     H5NS::DataSpace       dataSpace; |     H5NS::DataSpace       dataSpace; | ||||||
| @@ -260,37 +297,44 @@ namespace Grid | |||||||
|       H5NS::DataSet dataSet; |       H5NS::DataSet dataSet; | ||||||
|        |        | ||||||
|       dataSet = group_.openDataSet(s); |       dataSet = group_.openDataSet(s); | ||||||
|       dataSet.read(buf.data(), Hdf5Type<Element>::type()); |       dataSet.read(buf.data(), Hdf5Type<Scalar>::type()); | ||||||
|     } |     } | ||||||
|     else |     else | ||||||
|     { |     { | ||||||
|       H5NS::Attribute attribute; |       H5NS::Attribute attribute; | ||||||
|        |        | ||||||
|       attribute = group_.openAttribute(s); |       attribute = group_.openAttribute(s); | ||||||
|       attribute.read(Hdf5Type<Element>::type(), buf.data()); |       attribute.read(Hdf5Type<Scalar>::type(), buf.data()); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   typename std::enable_if<element<std::vector<U>>::is_number, void>::type |   typename std::enable_if<is_flattenable<std::vector<U>>::value>::type | ||||||
|   Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) |   Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) | ||||||
|   { |   { | ||||||
|     // alias to element type |     if (H5Lexists        (group_.getId(), s.c_str(), H5P_DEFAULT) > 0 | ||||||
|     typedef typename element<std::vector<U>>::type Element; |      && H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0) | ||||||
|  |     { | ||||||
|  |       readRagged(s, x); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |       // alias to element type | ||||||
|  |       using Scalar = typename is_flattenable<std::vector<U>>::type; | ||||||
|  |  | ||||||
|     std::vector<size_t>   dim; |       std::vector<size_t>   dim; | ||||||
|     std::vector<Element>  buf; |       std::vector<Scalar>   buf; | ||||||
|     readMultiDim( s, buf, dim ); |       readMultiDim( s, buf, dim ); | ||||||
|  |  | ||||||
|     // reconstruct the multidimensional vector |       // reconstruct the multidimensional vector | ||||||
|     Reconstruct<std::vector<U>> r(buf, dim); |       Reconstruct<std::vector<U>> r(buf, dim); | ||||||
|      |  | ||||||
|     x = r.getVector(); |       x = r.getVector(); | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename U> |   template <typename U> | ||||||
|   typename std::enable_if<!element<std::vector<U>>::is_number, void>::type |   void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x) | ||||||
|   Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) |  | ||||||
|   { |   { | ||||||
|     uint64_t size; |     uint64_t size; | ||||||
|      |      | ||||||
|   | |||||||
| @@ -5,7 +5,9 @@ | |||||||
| #include <complex> | #include <complex> | ||||||
| #include <memory> | #include <memory> | ||||||
|  |  | ||||||
| #ifndef H5_NO_NAMESPACE | #ifdef H5_NO_NAMESPACE | ||||||
|  | #define H5NS | ||||||
|  | #else | ||||||
| #define H5NS H5 | #define H5NS H5 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -118,13 +118,13 @@ static inline std::string SerialisableClassName(void) {return std::string(#cname | |||||||
| static constexpr bool isEnum = false; \ | static constexpr bool isEnum = false; \ | ||||||
| GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ | GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ | ||||||
| template <typename T>\ | template <typename T>\ | ||||||
| static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \ | static inline void write(::Grid::Writer<T> &WR,const std::string &s, const cname &obj){ \ | ||||||
|   push(WR,s);\ |   push(WR,s);\ | ||||||
|   GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__))	\ |   GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__))	\ | ||||||
|   pop(WR);\ |   pop(WR);\ | ||||||
| }\ | }\ | ||||||
| template <typename T>\ | template <typename T>\ | ||||||
| static inline void read(Reader<T> &RD,const std::string &s, cname &obj){	\ | static inline void read(::Grid::Reader<T> &RD,const std::string &s, cname &obj){	\ | ||||||
|   if (!push(RD,s))\ |   if (!push(RD,s))\ | ||||||
|   {\ |   {\ | ||||||
|     std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \ |     std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \ | ||||||
|   | |||||||
| @@ -9,7 +9,8 @@ | |||||||
|  Author: Antonin Portelli <antonin.portelli@me.com> |  Author: Antonin Portelli <antonin.portelli@me.com> | ||||||
|  Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  Author: paboyle <paboyle@ph.ed.ac.uk> |  Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|   |  Author: Michael Marshall <michael.marshall@ed.ac.uk> | ||||||
|  |  | ||||||
|  This program is free software; you can redistribute it and/or modify |  This program is free software; you can redistribute it and/or modify | ||||||
|  it under the terms of the GNU General Public License as published by |  it under the terms of the GNU General Public License as published by | ||||||
|  the Free Software Foundation; either version 2 of the License, or |  the Free Software Foundation; either version 2 of the License, or | ||||||
| @@ -236,21 +237,36 @@ namespace Grid { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // Vector element trait //////////////////////////////////////////////////////   |   // is_flattenable<T>::value is true if T is a std::vector<> which can be flattened ////////////////////// | ||||||
|   template <typename T> |   template <typename T, typename V = void> | ||||||
|   struct element |   struct is_flattenable : std::false_type | ||||||
|   { |   { | ||||||
|     typedef T type; |     using type      = T; | ||||||
|     static constexpr bool is_number = false; |     using grid_type = T; | ||||||
|  |     static constexpr int vecRank = 0; | ||||||
|  |     static constexpr bool isGridTensor = false; | ||||||
|  |     static constexpr bool children_flattenable = std::is_arithmetic<T>::value or is_complex<T>::value; | ||||||
|   }; |   }; | ||||||
|    |  | ||||||
|   template <typename T> |   template <typename T> | ||||||
|   struct element<std::vector<T>> |   struct is_flattenable<T, typename std::enable_if<isGridTensor<T>::value>::type> : std::false_type | ||||||
|   { |   { | ||||||
|     typedef typename element<T>::type type; |     using type      = typename GridTypeMapper<T>::scalar_type; | ||||||
|     static constexpr bool is_number = std::is_arithmetic<T>::value |     using grid_type = T; | ||||||
|                                       or is_complex<T>::value |     static constexpr int vecRank = 0; | ||||||
|                                       or element<T>::is_number; |     static constexpr bool isGridTensor = true; | ||||||
|  |     static constexpr bool children_flattenable = true; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   template <typename T> | ||||||
|  |   struct is_flattenable<std::vector<T>, typename std::enable_if<is_flattenable<T>::children_flattenable>::type> | ||||||
|  |   : std::true_type | ||||||
|  |   { | ||||||
|  |     using type      = typename is_flattenable<T>::type; | ||||||
|  |     using grid_type = typename is_flattenable<T>::grid_type; | ||||||
|  |     static constexpr bool isGridTensor = is_flattenable<T>::isGridTensor; | ||||||
|  |     static constexpr int vecRank = is_flattenable<T>::vecRank + 1; | ||||||
|  |     static constexpr bool children_flattenable = true; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // Vector flattening utility class //////////////////////////////////////////// |   // Vector flattening utility class //////////////////////////////////////////// | ||||||
| @@ -259,23 +275,30 @@ namespace Grid { | |||||||
|   class Flatten |   class Flatten | ||||||
|   { |   { | ||||||
|   public: |   public: | ||||||
|     typedef typename element<V>::type Element; |     using Scalar  = typename is_flattenable<V>::type; | ||||||
|  |     static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor; | ||||||
|   public: |   public: | ||||||
|     explicit                     Flatten(const V &vector); |     explicit                    Flatten(const V &vector); | ||||||
|     const V &                    getVector(void); |     const V &                   getVector(void)     const { return vector_; } | ||||||
|     const std::vector<Element> & getFlatVector(void); |     const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; } | ||||||
|     const std::vector<size_t>  & getDim(void); |     const std::vector<size_t> & getDim(void)        const { return dim_; } | ||||||
|   private: |   private: | ||||||
|     void accumulate(const Element &e); |     template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type | ||||||
|     template <typename W> |     accumulate(const W &e); | ||||||
|     void accumulate(const W &v); |     template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type | ||||||
|     void accumulateDim(const Element &e); |     accumulate(const W &e); | ||||||
|     template <typename W> |     template <typename W> typename std::enable_if< is_flattenable<W>::value>::type | ||||||
|     void accumulateDim(const W &v); |     accumulate(const W &v); | ||||||
|  |     template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type | ||||||
|  |     accumulateDim(const W &e) {} // Innermost is a scalar - do nothing | ||||||
|  |     template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type | ||||||
|  |     accumulateDim(const W &e); | ||||||
|  |     template <typename W> typename std::enable_if< is_flattenable<W>::value>::type | ||||||
|  |     accumulateDim(const W &v); | ||||||
|   private: |   private: | ||||||
|     const V              &vector_; |     const V             &vector_; | ||||||
|     std::vector<Element> flatVector_; |     std::vector<Scalar> flatVector_; | ||||||
|     std::vector<size_t>  dim_; |     std::vector<size_t> dim_; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|   // Class to reconstruct a multidimensional std::vector |   // Class to reconstruct a multidimensional std::vector | ||||||
| @@ -283,38 +306,57 @@ namespace Grid { | |||||||
|   class Reconstruct |   class Reconstruct | ||||||
|   { |   { | ||||||
|   public: |   public: | ||||||
|     typedef typename element<V>::type Element; |     using Scalar  = typename is_flattenable<V>::type; | ||||||
|  |     static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor; | ||||||
|   public: |   public: | ||||||
|     Reconstruct(const std::vector<Element> &flatVector, |     Reconstruct(const std::vector<Scalar> &flatVector, | ||||||
|                 const std::vector<size_t> &dim); |                 const std::vector<size_t> &dim); | ||||||
|     const V &                    getVector(void); |     const V &                   getVector(void)     const { return vector_; } | ||||||
|     const std::vector<Element> & getFlatVector(void); |     const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; } | ||||||
|     const std::vector<size_t>  & getDim(void); |     const std::vector<size_t> & getDim(void)        const { return dim_; } | ||||||
|   private: |   private: | ||||||
|     void fill(std::vector<Element> &v); |     template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type | ||||||
|     template <typename W> |     fill(W &v); | ||||||
|     void fill(W &v); |     template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type | ||||||
|     void resize(std::vector<Element> &v, const unsigned int dim); |     fill(W &v); | ||||||
|     template <typename W> |     template <typename W> typename std::enable_if< is_flattenable<W>::value>::type | ||||||
|     void resize(W &v, const unsigned int dim); |     fill(W &v); | ||||||
|  |     template <typename W> typename std::enable_if< is_flattenable<W>::value &&  is_flattenable<W>::vecRank==1>::type | ||||||
|  |     resize(W &v, const unsigned int dim); | ||||||
|  |     template <typename W> typename std::enable_if< is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type | ||||||
|  |     resize(W &v, const unsigned int dim); | ||||||
|  |     template <typename W> typename std::enable_if<!is_flattenable<W>::isGridTensor>::type | ||||||
|  |     checkInnermost(const W &e) {} // Innermost is a scalar - do nothing | ||||||
|  |     template <typename W> typename std::enable_if< is_flattenable<W>::isGridTensor>::type | ||||||
|  |     checkInnermost(const W &e); | ||||||
|   private: |   private: | ||||||
|     V                          vector_; |     V                         vector_; | ||||||
|     const std::vector<Element> &flatVector_; |     const std::vector<Scalar> &flatVector_; | ||||||
|     std::vector<size_t>        dim_; |     std::vector<size_t>       dim_; | ||||||
|     size_t                     ind_{0}; |     size_t                    ind_{0}; | ||||||
|     unsigned int               dimInd_{0}; |     unsigned int              dimInd_{0}; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   // Flatten class template implementation |   // Flatten class template implementation | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   void Flatten<V>::accumulate(const Element &e) |   template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type | ||||||
|  |   Flatten<V>::accumulate(const W &e) | ||||||
|   { |   { | ||||||
|     flatVector_.push_back(e); |     flatVector_.push_back(e); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   template <typename W> |   template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type | ||||||
|   void Flatten<V>::accumulate(const W &v) |   Flatten<V>::accumulate(const W &e) | ||||||
|  |   { | ||||||
|  |     for (const Scalar &x: e) { | ||||||
|  |       flatVector_.push_back(x); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template <typename V> | ||||||
|  |   template <typename W> typename std::enable_if<is_flattenable<W>::value>::type | ||||||
|  |   Flatten<V>::accumulate(const W &v) | ||||||
|   { |   { | ||||||
|     for (auto &e: v) |     for (auto &e: v) | ||||||
|     { |     { | ||||||
| @@ -323,11 +365,17 @@ namespace Grid { | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   void Flatten<V>::accumulateDim(const Element &e) {}; |   template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type | ||||||
|  |   Flatten<V>::accumulateDim(const W &e) | ||||||
|  |   { | ||||||
|  |     using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>; | ||||||
|  |     for (int rank=0; rank < Traits::Rank; ++rank) | ||||||
|  |       dim_.push_back(Traits::Dimension(rank)); | ||||||
|  |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   template <typename W> |   template <typename W> typename std::enable_if<is_flattenable<W>::value>::type | ||||||
|   void Flatten<V>::accumulateDim(const W &v) |   Flatten<V>::accumulateDim(const W &v) | ||||||
|   { |   { | ||||||
|     dim_.push_back(v.size()); |     dim_.push_back(v.size()); | ||||||
|     accumulateDim(v[0]); |     accumulateDim(v[0]); | ||||||
| @@ -337,42 +385,36 @@ namespace Grid { | |||||||
|   Flatten<V>::Flatten(const V &vector) |   Flatten<V>::Flatten(const V &vector) | ||||||
|   : vector_(vector) |   : vector_(vector) | ||||||
|   { |   { | ||||||
|     accumulate(vector_); |  | ||||||
|     accumulateDim(vector_); |     accumulateDim(vector_); | ||||||
|   } |     std::size_t TotalSize{ dim_[0] }; | ||||||
|    |     for (int i = 1; i < dim_.size(); ++i) { | ||||||
|   template <typename V> |       TotalSize *= dim_[i]; | ||||||
|   const V & Flatten<V>::getVector(void) |     } | ||||||
|   { |     flatVector_.reserve(TotalSize); | ||||||
|     return vector_; |     accumulate(vector_); | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <typename V> |  | ||||||
|   const std::vector<typename Flatten<V>::Element> & |  | ||||||
|   Flatten<V>::getFlatVector(void) |  | ||||||
|   { |  | ||||||
|     return flatVector_; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <typename V> |  | ||||||
|   const std::vector<size_t> & Flatten<V>::getDim(void) |  | ||||||
|   { |  | ||||||
|     return dim_; |  | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   // Reconstruct class template implementation |   // Reconstruct class template implementation | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   void Reconstruct<V>::fill(std::vector<Element> &v) |   template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type | ||||||
|  |   Reconstruct<V>::fill(W &v) | ||||||
|  |   { | ||||||
|  |     v = flatVector_[ind_++]; | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   template <typename V> | ||||||
|  |   template <typename W> typename std::enable_if<!is_flattenable<W>::value &&  is_flattenable<W>::isGridTensor>::type | ||||||
|  |   Reconstruct<V>::fill(W &v) | ||||||
|   { |   { | ||||||
|     for (auto &e: v) |     for (auto &e: v) | ||||||
|     { |     { | ||||||
|       e = flatVector_[ind_++]; |       e = flatVector_[ind_++]; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|    |  | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   template <typename W> |   template <typename W> typename std::enable_if<is_flattenable<W>::value>::type | ||||||
|   void Reconstruct<V>::fill(W &v) |   Reconstruct<V>::fill(W &v) | ||||||
|   { |   { | ||||||
|     for (auto &e: v) |     for (auto &e: v) | ||||||
|     { |     { | ||||||
| @@ -381,14 +423,15 @@ namespace Grid { | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim) |   template <typename W> typename std::enable_if<is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type | ||||||
|  |   Reconstruct<V>::resize(W &v, const unsigned int dim) | ||||||
|   { |   { | ||||||
|     v.resize(dim_[dim]); |     v.resize(dim_[dim]); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   template <typename W> |   template <typename W> typename std::enable_if<is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type | ||||||
|   void Reconstruct<V>::resize(W &v, const unsigned int dim) |   Reconstruct<V>::resize(W &v, const unsigned int dim) | ||||||
|   { |   { | ||||||
|     v.resize(dim_[dim]); |     v.resize(dim_[dim]); | ||||||
|     for (auto &e: v) |     for (auto &e: v) | ||||||
| @@ -398,34 +441,31 @@ namespace Grid { | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |   template <typename V> | ||||||
|   Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector, |   template <typename W> typename std::enable_if<is_flattenable<W>::isGridTensor>::type | ||||||
|  |   Reconstruct<V>::checkInnermost(const W &) | ||||||
|  |   { | ||||||
|  |     using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>; | ||||||
|  |     const int gridRank{Traits::Rank}; | ||||||
|  |     const int dimRank{static_cast<int>(dim_.size())}; | ||||||
|  |     assert(dimRank >= gridRank && "Tensor rank too low for Grid tensor"); | ||||||
|  |     for (int i=0; i<gridRank; ++i) { | ||||||
|  |       assert(dim_[dimRank - gridRank + i] == Traits::Dimension(i) && "Tensor dimension doesn't match Grid tensor"); | ||||||
|  |     } | ||||||
|  |     dim_.resize(dimRank - gridRank); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template <typename V> | ||||||
|  |   Reconstruct<V>::Reconstruct(const std::vector<Scalar> &flatVector, | ||||||
|                               const std::vector<size_t> &dim) |                               const std::vector<size_t> &dim) | ||||||
|   : flatVector_(flatVector) |   : flatVector_(flatVector) | ||||||
|   , dim_(dim) |   , dim_(dim) | ||||||
|   { |   { | ||||||
|  |     checkInnermost(vector_); | ||||||
|  |     assert(dim_.size() == is_flattenable<V>::vecRank && "Tensor rank doesn't match nested std::vector rank"); | ||||||
|     resize(vector_, 0); |     resize(vector_, 0); | ||||||
|     fill(vector_); |     fill(vector_); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <typename V> |  | ||||||
|   const V & Reconstruct<V>::getVector(void) |  | ||||||
|   { |  | ||||||
|     return vector_; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <typename V> |  | ||||||
|   const std::vector<typename Reconstruct<V>::Element> & |  | ||||||
|   Reconstruct<V>::getFlatVector(void) |  | ||||||
|   { |  | ||||||
|     return flatVector_; |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   template <typename V> |  | ||||||
|   const std::vector<size_t> & Reconstruct<V>::getDim(void) |  | ||||||
|   { |  | ||||||
|     return dim_; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Vector IO utilities /////////////////////////////////////////////////////// |   // Vector IO utilities /////////////////////////////////////////////////////// | ||||||
|   // helper function to read space-separated values |   // helper function to read space-separated values | ||||||
|   template <typename T> |   template <typename T> | ||||||
| @@ -459,6 +499,64 @@ namespace Grid { | |||||||
|      |      | ||||||
|     return os; |     return os; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // In general, scalar types are considered "flattenable" (regularly shaped) | ||||||
|  |   template <typename T> | ||||||
|  |   bool isRegularShapeHelper(const std::vector<T> &, std::vector<std::size_t> &, int, bool) | ||||||
|  |   { | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template <typename T> | ||||||
|  |   bool isRegularShapeHelper(const std::vector<std::vector<T>> &v, std::vector<std::size_t> &Dims, int Depth, bool bFirst) | ||||||
|  |   { | ||||||
|  |     if( bFirst) | ||||||
|  |     { | ||||||
|  |       assert( Dims.size() == Depth     && "Bug: Delete this message after testing" ); | ||||||
|  |       Dims.push_back(v[0].size()); | ||||||
|  |       if (!Dims[Depth]) | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |       assert( Dims.size() >= Depth + 1 && "Bug: Delete this message after testing" ); | ||||||
|  |     } | ||||||
|  |     for (std::size_t i = 0; i < v.size(); ++i) | ||||||
|  |     { | ||||||
|  |       if (v[i].size() != Dims[Depth] || !isRegularShapeHelper(v[i], Dims, Depth + 1, bFirst && i==0)) | ||||||
|  |       { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template <typename T> | ||||||
|  |   bool isRegularShape(const T &t) { return true; } | ||||||
|  |  | ||||||
|  |   template <typename T> | ||||||
|  |   bool isRegularShape(const std::vector<T> &v) { return !v.empty(); } | ||||||
|  |  | ||||||
|  |   // Return non-zero if all dimensions of this std::vector<std::vector<T>> are regularly shaped | ||||||
|  |   template <typename T> | ||||||
|  |   bool isRegularShape(const std::vector<std::vector<T>> &v) | ||||||
|  |   { | ||||||
|  |     if (v.empty() || v[0].empty()) | ||||||
|  |       return false; | ||||||
|  |     // Make sure all of my rows are the same size | ||||||
|  |     std::vector<std::size_t> Dims; | ||||||
|  |     Dims.reserve(is_flattenable<T>::vecRank); | ||||||
|  |     Dims.push_back(v.size()); | ||||||
|  |     Dims.push_back(v[0].size()); | ||||||
|  |     for (std::size_t i = 0; i < Dims[0]; ++i) | ||||||
|  |     { | ||||||
|  |       if (v[i].size() != Dims[1] || !isRegularShapeHelper(v[i], Dims, 2, i==0)) | ||||||
|  |       { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| // helper function to read space-separated values | // helper function to read space-separated values | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user