mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			290 Commits
		
	
	
		
			feature/se
			...
			feature/dd
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | c04de86d07 | ||
|  | 53e1b00cde | ||
|  | 1257c9f2f0 | ||
|  | 44b0988f9b | ||
|  | 98766851c3 | ||
|  | f36e984278 | ||
|  | c234a7f913 | ||
|  | 0bf0c65b7d | ||
|  | 3355ceea9f | ||
|  | e4cbfe3d4b | ||
|  | 4867e02dfb | ||
|  | ed68cf6268 | ||
|  | 4016e705fc | ||
|  | 2f4e85e5d6 | ||
|  | 8ed0b57b09 | ||
| 6c66b8d997 | |||
| 9523ad3d73 | |||
| 73a95fa96f | |||
|  | 67e08aa952 | ||
|  | ed1f20f3a1 | ||
|  | cffc736bb3 | ||
|  | c0d56a1c04 | ||
|  | 3206f69478 | ||
|  | b2ccaad761 | ||
|  | 8eb1232683 | ||
|  | c6ce3ad03b | ||
|  | b3b033d343 | ||
|  | ca9816bfbb | ||
|  | 814d5abc7e | ||
|  | a29122e2bf | ||
|  | e188c0512e | ||
|  | 1fb6aaf150 | ||
|  | 894654f7ef | ||
|  | 109507888b | ||
|  | 68650b61fe | ||
|  | 7ee66bf453 | ||
|  | 8bd70ad8b5 | ||
|  | af98525766 | ||
|  | 1c2f218519 | ||
|  | c9aa1f507c | ||
|  | ea7126496d | ||
|  | f660dc67e4 | ||
|  | ede8faea74 | ||
|  | 1b750761c2 | ||
|  | 145acf2919 | ||
|  | cc4a27b9e6 | ||
|  | b4690e6091 | ||
|  | 4b24800132 | ||
|  | 9d2238148c | ||
|  | c15493218d | ||
|  | 001a556a34 | ||
|  | 3d0f88e702 | ||
|  | dd091d0960 | ||
|  | e2abbf9520 | ||
|  | c7baeb5bae | ||
|  | 402d80e197 | ||
|  | 86e33c8ab2 | ||
|  | 5dae6a6dac | ||
|  | 361bb8a101 | ||
|  | 7efdb3cd2b | ||
|  | 65ef4ec29f | ||
|  | d5835c0222 | ||
|  | a7b943b33e | ||
|  | 7440cde92f | ||
|  | 0fc662bb24 | ||
|  | 8195890640 | ||
|  | 4c88104a73 | ||
|  | 73b944c152 | ||
|  | d1b0b7f5c6 | ||
|  | 381d8797d0 | ||
|  | b06526bc1e | ||
|  | 3044419111 | ||
|  | bcfa9cf068 | ||
|  | 114920b8de | ||
|  | 0d588b95f4 | ||
|  | 5b3c530aa7 | ||
|  | c6a5499c8b | ||
|  | ec9c3fe77a | ||
|  | 6135ad530e | ||
|  | 40098424c7 | ||
|  | 7163b31a26 | ||
|  | ffbdd91e0e | ||
|  | 7760417312 | ||
|  | 5d29e175d8 | ||
|  | 417dbfa257 | ||
|  | 1eda4d8e0b | ||
|  | 50181f16e5 | ||
|  | 75030637cc | ||
|  | fe5aaf7677 | ||
|  | 80ac2a73ca | ||
| d75a66a3e6 | |||
| fcc4374d7b | |||
| 67c3c16fe5 | |||
| 25e9be50b5 | |||
| 323cf6c038 | |||
|  | 29a22ae603 | ||
|  | 403bff1a47 | ||
|  | c50f27e68b | ||
|  | 80afacec5b | ||
|  | 6cd9224dd7 | ||
|  | 4bf8196ff1 | ||
|  | 4c5440fb06 | ||
|  | 8cc0defb69 | ||
|  | e0c9d01123 | ||
|  | 91bf1df018 | ||
|  | 386a89c668 | ||
|  | 670f4985fd | ||
|  | dcd48a0435 | ||
|  | 87ec14c353 | ||
|  | 4f5ad73501 | ||
|  | 096bb98f78 | ||
|  | 76837ffc65 | ||
|  | 81bd0d7906 | ||
|  | 7d8d250389 | ||
|  | ad406b2c3e | ||
|  | e6366b98a5 | ||
|  | 302356189c | ||
|  | 9394450c1a | ||
|  | 6cf3edef00 | ||
|  | 31cc227dd2 | ||
|  | c690e66325 | ||
|  | 5fdbb924f0 | ||
|  | 6dcaed621c | ||
|  | f9cda24781 | ||
|  | cd5e3fbd82 | ||
|  | f68036c79f | ||
|  | 216c178c16 | ||
|  | 990d976241 | ||
|  | f2fe2573a7 | ||
|  | 11c55a0476 | ||
|  | 1cc706b2f4 | ||
|  | 9e51fa5681 | ||
|  | 9164cfbfc6 | ||
|  | 2f3a96e5de | ||
|  | 26aa89cb0c | ||
|  | 426d2365d1 | ||
|  | 81bbd4e4ce | ||
|  | b83bbe2dd1 | ||
|  | 3f2d8eb727 | ||
|  | 60f9bf69cd | ||
|  | 38d8cd228e | ||
|  | 4e1e242025 | ||
|  | 20c6f19bb2 | ||
|  | 036270a0c5 | ||
|  | 6c506601c5 | ||
|  | b5aeae526f | ||
|  | e4ff4c902a | ||
|  | 2e4d4625b6 | ||
|  | 10f2c2530b | ||
|  | 375e0698dc | ||
|  | 64b3b37476 | ||
|  | 59e1a9be4e | ||
|  | aac1736617 | ||
|  | 60f814733d | ||
|  | 612e9a178a | ||
|  | 21af9cf83c | ||
|  | 3b8cb929d6 | ||
|  | 5d3046eae8 | ||
|  | d73063682e | ||
|  | 59584b6605 | ||
|  | f6d7188615 | ||
|  | b810b6f6bd | ||
|  | e0a92dff32 | ||
|  | e125f0f738 | ||
|  | 5f081d87b0 | ||
|  | 84e246a963 | ||
|  | c18025c0b8 | ||
|  | a918955020 | ||
|  | e3c18ce872 | ||
|  | 07a61e8826 | ||
|  | 58cb7c0732 | ||
|  | 10339fd775 | ||
|  | 9400c207f7 | ||
|  | 1fa89a2e7d | ||
|  | 09b233b82e | ||
|  | 34ca4dd53a | ||
|  | c19cf46169 | ||
|  | c8db9ddb33 | ||
|  | 104986b53d | ||
|  | 91fd44419b | ||
|  | d1daa0e3f7 | ||
|  | 05e1aed326 | ||
|  | d3fd23117a | ||
|  | def51267e9 | ||
|  | 5b52f29b2f | ||
|  | 25bd03f201 | ||
|  | d5edd100a5 | ||
|  | e39e326b79 | ||
|  | 8458e13a23 | ||
|  | 3575278b57 | ||
|  | 69a2c8769a | ||
|  | d4eaea00cf | ||
|  | 347ccdc468 | ||
|  | bf034ce239 | ||
|  | 791d0ab0b5 | ||
|  | 94a2a645bd | ||
|  | 281b55df04 | ||
|  | a36e797bfc | ||
|  | 0bade717bf | ||
|  | 84fe791519 | ||
|  | e009a37f6e | ||
|  | d7a887baf1 | ||
|  | 060bb59535 | ||
|  | bac36399c1 | ||
|  | b5b930d5bb | ||
|  | 4fca66a7c6 | ||
|  | bd181b9481 | ||
|  | 919ced1c31 | ||
|  | b32fd473f8 | ||
|  | ffcab64890 | ||
|  | 374fb325f3 | ||
|  | bab88bc4f7 | ||
|  | 7533f66b54 | ||
|  | 805cde5899 | ||
|  | a0534e03f9 | ||
|  | ebba195e0d | ||
|  | 3b433fe6fb | ||
|  | 07d1030660 | ||
|  | f8d7d23893 | ||
|  | cdeb718229 | ||
|  | cb28568198 | ||
|  | 45440da79d | ||
|  | 6fe8533414 | ||
|  | f776a7fe4a | ||
|  | cff884929c | ||
|  | 9c991c7e29 | ||
|  | 1c70d8c4d9 | ||
|  | f0e9a5299f | ||
|  | f1b8ba45e7 | ||
|  | fe998ab578 | ||
|  | c2ee2b5fd1 | ||
|  | 3b734ee397 | ||
|  | 8637a9512a | ||
|  | 7f6e2ee03e | ||
|  | 7b02acb2bd | ||
|  | 86948c6ea0 | ||
|  | 53d226924a | ||
|  | 80176b1b39 | ||
|  | 29ddafd0fc | ||
|  | 0f08364e4f | ||
|  | a198d59381 | ||
|  | 3a4f5f2324 | ||
|  | 824d84473f | ||
|  | 38964a4076 | ||
|  | 0d9aa87228 | ||
|  | 0e959d9b94 | ||
|  | 752f70cd48 | ||
|  | e0e42873c1 | ||
|  | 21165ed489 | ||
|  | 09288d633c | ||
|  | fe00c96435 | ||
|  | 0765f30308 | ||
|  | a6326b664e | ||
|  | ccd30e1485 | ||
|  | 3060887a37 | ||
|  | b53059344e | ||
|  | aaf5ebf345 | ||
|  | 48edb8f72e | ||
|  | 0ff3bf6dc5 | ||
|  | 351eab02ae | ||
|  | 2bb374daea | ||
|  | 49ecbc81d4 | ||
|  | 9e5fb52eb9 | ||
|  | feee5ccde2 | ||
|  | e0f6a146d8 | ||
|  | daa095c519 | ||
|  | c2676853ca | ||
|  | 6a824033f8 | ||
|  | cee6a37639 | ||
|  | 6cc3ad110c | ||
|  | e6c6f82c52 | ||
|  | d10d0c4e7f | ||
|  | 9c106d625a | ||
|  | 6795bbca31 | ||
|  | d161c2dc35 | ||
|  | 7a06826cf1 | ||
|  | c3712b8e06 | ||
|  | 901ee77b84 | ||
|  | 1b84f59273 | ||
|  | 1fb41a4300 | ||
|  | 287bac946f | ||
|  | 80c14be65e | ||
|  | d7a2a4852d | ||
|  | d185f2eaa7 | ||
|  | 813d4cd900 | ||
|  | 75c6c6b173 | ||
|  | 220ad5e3ee | ||
|  | ba5dc670a5 | ||
|  | a0ca362690 | ||
|  | 249b6e61ec | 
| @@ -34,6 +34,9 @@ directory | |||||||
|  |  | ||||||
| #if defined __GNUC__ && __GNUC__>=6 | #if defined __GNUC__ && __GNUC__>=6 | ||||||
| #pragma GCC diagnostic ignored "-Wignored-attributes" | #pragma GCC diagnostic ignored "-Wignored-attributes" | ||||||
|  | #endif | ||||||
|  | #if defined __GNUC__ && __GNUC__>=6 | ||||||
|  | #pragma GCC diagnostic ignored "-Wpsabi" | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  //disables and intel compiler specific warning (in json.hpp) |  //disables and intel compiler specific warning (in json.hpp) | ||||||
|   | |||||||
| @@ -36,6 +36,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/qcd/QCD.h> | #include <Grid/qcd/QCD.h> | ||||||
| #include <Grid/qcd/spin/Spin.h> | #include <Grid/qcd/spin/Spin.h> | ||||||
|  | #include <Grid/qcd/gparity/Gparity.h> | ||||||
| #include <Grid/qcd/utils/Utils.h> | #include <Grid/qcd/utils/Utils.h> | ||||||
| #include <Grid/qcd/representations/Representations.h> | #include <Grid/qcd/representations/Representations.h> | ||||||
| NAMESPACE_CHECK(GridQCDCore); | NAMESPACE_CHECK(GridQCDCore); | ||||||
|   | |||||||
| @@ -54,6 +54,7 @@ NAMESPACE_CHECK(BiCGSTAB); | |||||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
|  | #include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h> | ||||||
| #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h> | #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h> | ||||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | ||||||
|   | |||||||
| @@ -442,6 +442,8 @@ public: | |||||||
|     for(int p=0; p<geom.npoint; p++) |     for(int p=0; p<geom.npoint; p++) | ||||||
|       points[p] = geom.points_dagger[p]; |       points[p] = geom.points_dagger[p]; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
| @@ -453,7 +455,7 @@ public: | |||||||
|       StencilEntry *SE; |       StencilEntry *SE; | ||||||
|  |  | ||||||
|       for(int p=0;p<geom_v.npoint;p++){ |       for(int p=0;p<geom_v.npoint;p++){ | ||||||
|         int point = points[p]; |         int point = points_p[p]; | ||||||
|  |  | ||||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
| @@ -708,6 +710,8 @@ public: | |||||||
|     for(int p=0; p<npoint; p++) |     for(int p=0; p<npoint; p++) | ||||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; |       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||||
|  |  | ||||||
|  |     auto points_p = &points[0]; | ||||||
|  |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |     Vector<Aview> AcceleratorViewContainer; | ||||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); |     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
| @@ -728,7 +732,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points[p]; |           int point = points_p[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
| @@ -754,7 +758,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points[p]; |           int point = points_p[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
|   | |||||||
| @@ -136,7 +136,7 @@ public: | |||||||
|     flops=0; |     flops=0; | ||||||
|     usec =0; |     usec =0; | ||||||
|     Coordinate layout(Nd,1); |     Coordinate layout(Nd,1); | ||||||
|     sgrid = new GridCartesian(dimensions,layout,processors); |     sgrid = new GridCartesian(dimensions,layout,processors,*grid); | ||||||
|   }; |   }; | ||||||
|      |      | ||||||
|   ~FFT ( void)  { |   ~FFT ( void)  { | ||||||
| @@ -182,7 +182,7 @@ public: | |||||||
|     pencil_gd[dim] = G*processors[dim]; |     pencil_gd[dim] = G*processors[dim]; | ||||||
|        |        | ||||||
|     // Pencil global vol LxLxGxLxL per node |     // Pencil global vol LxLxGxLxL per node | ||||||
|     GridCartesian pencil_g(pencil_gd,layout,processors); |     GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid); | ||||||
|        |        | ||||||
|     // Construct pencils |     // Construct pencils | ||||||
|     typedef typename vobj::scalar_object sobj; |     typedef typename vobj::scalar_object sobj; | ||||||
|   | |||||||
| @@ -223,9 +223,14 @@ class SchurOperatorBase :  public LinearOperatorBase<Field> { | |||||||
|     Mpc(in,tmp); |     Mpc(in,tmp); | ||||||
|     MpcDag(tmp,out); |     MpcDag(tmp,out); | ||||||
|   } |   } | ||||||
|  |   virtual  void MpcMpcDag(const Field &in, Field &out) { | ||||||
|  |     Field tmp(in.Grid()); | ||||||
|  |     tmp.Checkerboard() = in.Checkerboard(); | ||||||
|  |     MpcDag(in,tmp); | ||||||
|  |     Mpc(tmp,out); | ||||||
|  |   } | ||||||
|   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
|     out.Checkerboard() = in.Checkerboard(); |     HermOp(in,out); | ||||||
|     MpcDagMpc(in,out); |  | ||||||
|     ComplexD dot= innerProduct(in,out);  |     ComplexD dot= innerProduct(in,out);  | ||||||
|     n1=real(dot); |     n1=real(dot); | ||||||
|     n2=norm2(out); |     n2=norm2(out); | ||||||
| @@ -276,6 +281,16 @@ template<class Matrix,class Field> | |||||||
|       axpy(out,-1.0,tmp,out); |       axpy(out,-1.0,tmp,out); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  | // Mpc MpcDag system presented as the HermOp | ||||||
|  | template<class Matrix,class Field> | ||||||
|  | class SchurDiagMooeeDagOperator :  public SchurDiagMooeeOperator<Matrix,Field> { | ||||||
|  |  public: | ||||||
|  |   virtual void HermOp(const Field &in, Field &out){ | ||||||
|  |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |     this->MpcMpcDag(in,out); | ||||||
|  |   } | ||||||
|  |   SchurDiagMooeeDagOperator (Matrix &Mat): SchurDiagMooeeOperator<Matrix,Field>(Mat){}; | ||||||
|  | }; | ||||||
| template<class Matrix,class Field> | template<class Matrix,class Field> | ||||||
|   class SchurDiagOneOperator :  public SchurOperatorBase<Field> { |   class SchurDiagOneOperator :  public SchurOperatorBase<Field> { | ||||||
|  protected: |  protected: | ||||||
| @@ -530,6 +545,16 @@ public: | |||||||
| template<class Field> class LinearFunction { | template<class Field> class LinearFunction { | ||||||
| public: | public: | ||||||
|   virtual void operator() (const Field &in, Field &out) = 0; |   virtual void operator() (const Field &in, Field &out) = 0; | ||||||
|  |  | ||||||
|  |   virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out) | ||||||
|  |   { | ||||||
|  |     assert(in.size() == out.size()); | ||||||
|  |  | ||||||
|  |     for (unsigned int i = 0; i < in.size(); ++i) | ||||||
|  |     { | ||||||
|  |       (*this)(in[i], out[i]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||||
|   | |||||||
| @@ -292,6 +292,7 @@ public: | |||||||
| template<class Field> | template<class Field> | ||||||
| class ChebyshevLanczos : public Chebyshev<Field> { | class ChebyshevLanczos : public Chebyshev<Field> { | ||||||
| private: | private: | ||||||
|  |  | ||||||
|   std::vector<RealD> Coeffs; |   std::vector<RealD> Coeffs; | ||||||
|   int order; |   int order; | ||||||
|   RealD alpha; |   RealD alpha; | ||||||
|   | |||||||
| @@ -102,7 +102,7 @@ public: | |||||||
|     // Check if guess is really REALLY good :) |     // Check if guess is really REALLY good :) | ||||||
|     if (cp <= rsq) { |     if (cp <= rsq) { | ||||||
|       TrueResidual = std::sqrt(a/ssq); |       TrueResidual = std::sqrt(a/ssq); | ||||||
|       std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; |       std::cout << GridLogMessage << "ConjugateGradient guess is converged already "<<TrueResidual<< " tol "<< Tolerance<< std::endl; | ||||||
|       IterationsToComplete = 0;	 |       IterationsToComplete = 0;	 | ||||||
|       return; |       return; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -48,19 +48,29 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     Integer TotalInnerIterations; //Number of inner CG iterations |     Integer TotalInnerIterations; //Number of inner CG iterations | ||||||
|     Integer TotalOuterIterations; //Number of restarts |     Integer TotalOuterIterations; //Number of restarts | ||||||
|     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step |     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||||
|  |     RealD TrueResidual; | ||||||
|  |  | ||||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess |     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||||
|     LinearFunction<FieldF> *guesser; |     LinearFunction<FieldF> *guesser; | ||||||
|  |  | ||||||
|     MixedPrecisionConjugateGradient(RealD tol,  |     MixedPrecisionConjugateGradient(RealD Tol, | ||||||
|  | 				    Integer maxinnerit,  | ||||||
|  | 				    Integer maxouterit,  | ||||||
|  | 				    GridBase* _sp_grid,  | ||||||
|  | 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||||
|  | 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||||
|  |       MixedPrecisionConjugateGradient(Tol, Tol, maxinnerit, maxouterit, _sp_grid, _Linop_f, _Linop_d) {}; | ||||||
|  |  | ||||||
|  |     MixedPrecisionConjugateGradient(RealD Tol, | ||||||
|  | 				    RealD InnerTol, | ||||||
| 				    Integer maxinnerit,  | 				    Integer maxinnerit,  | ||||||
| 				    Integer maxouterit,  | 				    Integer maxouterit,  | ||||||
| 				    GridBase* _sp_grid,  | 				    GridBase* _sp_grid,  | ||||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  | 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : | 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), |       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||||
|       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), |       Tolerance(Tol), InnerTolerance(InnerTol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||||
|       OuterLoopNormMult(100.), guesser(NULL){ }; |       OuterLoopNormMult(100.), guesser(NULL){ assert(InnerTol < 1.0e-1);}; | ||||||
|  |  | ||||||
|     void useGuesser(LinearFunction<FieldF> &g){ |     void useGuesser(LinearFunction<FieldF> &g){ | ||||||
|       guesser = &g; |       guesser = &g; | ||||||
| @@ -79,6 +89,11 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     RealD stop = src_norm * Tolerance*Tolerance; |     RealD stop = src_norm * Tolerance*Tolerance; | ||||||
|  |  | ||||||
|     GridBase* DoublePrecGrid = src_d_in.Grid(); |     GridBase* DoublePrecGrid = src_d_in.Grid(); | ||||||
|  |  | ||||||
|  |     //Generate precision change workspaces | ||||||
|  |     precisionChangeWorkspace wk_dp_from_sp(DoublePrecGrid, SinglePrecGrid); | ||||||
|  |     precisionChangeWorkspace wk_sp_from_dp(SinglePrecGrid, DoublePrecGrid); | ||||||
|  |  | ||||||
|     FieldD tmp_d(DoublePrecGrid); |     FieldD tmp_d(DoublePrecGrid); | ||||||
|     tmp_d.Checkerboard() = cb; |     tmp_d.Checkerboard() = cb; | ||||||
|      |      | ||||||
| @@ -119,7 +134,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? |       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? | ||||||
|  |  | ||||||
|       PrecChangeTimer.Start(); |       PrecChangeTimer.Start(); | ||||||
|       precisionChange(src_f, src_d); |       precisionChange(src_f, src_d, wk_sp_from_dp); | ||||||
|       PrecChangeTimer.Stop(); |       PrecChangeTimer.Stop(); | ||||||
|        |        | ||||||
|       sol_f = Zero(); |       sol_f = Zero(); | ||||||
| @@ -137,7 +152,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|        |        | ||||||
|       //Convert sol back to double and add to double prec solution |       //Convert sol back to double and add to double prec solution | ||||||
|       PrecChangeTimer.Start(); |       PrecChangeTimer.Start(); | ||||||
|       precisionChange(tmp_d, sol_f); |       precisionChange(tmp_d, sol_f, wk_dp_from_sp); | ||||||
|       PrecChangeTimer.Stop(); |       PrecChangeTimer.Stop(); | ||||||
|        |        | ||||||
|       axpy(sol_d, 1.0, tmp_d, sol_d); |       axpy(sol_d, 1.0, tmp_d, sol_d); | ||||||
| @@ -149,6 +164,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); |     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||||
|     CG_d(Linop_d, src_d_in, sol_d); |     CG_d(Linop_d, src_d_in, sol_d); | ||||||
|     TotalFinalStepIterations = CG_d.IterationsToComplete; |     TotalFinalStepIterations = CG_d.IterationsToComplete; | ||||||
|  |     TrueResidual = CG_d.TrueResidual; | ||||||
|  |  | ||||||
|     TotalTimer.Stop(); |     TotalTimer.Stop(); | ||||||
|     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; |     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; | ||||||
|   | |||||||
| @@ -52,7 +52,7 @@ public: | |||||||
|   MultiShiftFunction shifts; |   MultiShiftFunction shifts; | ||||||
|   std::vector<RealD> TrueResidualShift; |   std::vector<RealD> TrueResidualShift; | ||||||
|  |  | ||||||
|   ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :  |   ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :  | ||||||
|     MaxIterations(maxit), |     MaxIterations(maxit), | ||||||
|     shifts(_shifts) |     shifts(_shifts) | ||||||
|   {  |   {  | ||||||
| @@ -183,6 +183,9 @@ public: | |||||||
|       axpby(psi[s],0.,-bs[s]*alpha[s],src,src); |       axpby(psi[s],0.,-bs[s]*alpha[s],src,src); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl; | ||||||
|  |      | ||||||
|  |    | ||||||
|   /////////////////////////////////////// |   /////////////////////////////////////// | ||||||
|   // Timers |   // Timers | ||||||
|   /////////////////////////////////////// |   /////////////////////////////////////// | ||||||
|   | |||||||
							
								
								
									
										411
									
								
								Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										411
									
								
								Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,411 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Christopher Kelly <ckelly@bnl.gov> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H | ||||||
|  | #define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | //CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.  | ||||||
|  | //The residual is stored in single precision, but the search directions and solution are stored in double precision.  | ||||||
|  | //Every update_freq iterations the residual is corrected in double precision.  | ||||||
|  |      | ||||||
|  | //For safety the a final regular CG is applied to clean up if necessary | ||||||
|  |  | ||||||
|  | //Linop to add shift to input linop, used in cleanup CG | ||||||
|  | namespace ConjugateGradientMultiShiftMixedPrecSupport{ | ||||||
|  | template<typename Field> | ||||||
|  | class ShiftedLinop: public LinearOperatorBase<Field>{ | ||||||
|  | public: | ||||||
|  |   LinearOperatorBase<Field> &linop_base; | ||||||
|  |   RealD shift; | ||||||
|  |  | ||||||
|  |   ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){} | ||||||
|  |  | ||||||
|  |   void OpDiag (const Field &in, Field &out){ assert(0); } | ||||||
|  |   void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); } | ||||||
|  |   void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } | ||||||
|  |    | ||||||
|  |   void Op     (const Field &in, Field &out){ assert(0); } | ||||||
|  |   void AdjOp  (const Field &in, Field &out){ assert(0); } | ||||||
|  |  | ||||||
|  |   void HermOp(const Field &in, Field &out){ | ||||||
|  |     linop_base.HermOp(in, out); | ||||||
|  |     axpy(out, shift, in, out); | ||||||
|  |   }     | ||||||
|  |  | ||||||
|  |   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
|  |     HermOp(in,out); | ||||||
|  |     ComplexD dot = innerProduct(in,out); | ||||||
|  |     n1=real(dot); | ||||||
|  |     n2=norm2(out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class FieldD, class FieldF, | ||||||
|  | 	 typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, | ||||||
|  | 	 typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||||
|  | class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>, | ||||||
|  | 					     public OperatorFunction<FieldD> | ||||||
|  | { | ||||||
|  | public:                                                 | ||||||
|  |  | ||||||
|  |   using OperatorFunction<FieldD>::operator(); | ||||||
|  |  | ||||||
|  |   RealD   Tolerance; | ||||||
|  |   Integer MaxIterations; | ||||||
|  |   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||||
|  |   std::vector<int> IterationsToCompleteShift;  // Iterations for this shift | ||||||
|  |   int verbose; | ||||||
|  |   MultiShiftFunction shifts; | ||||||
|  |   std::vector<RealD> TrueResidualShift; | ||||||
|  |  | ||||||
|  |   int ReliableUpdateFreq; //number of iterations between reliable updates | ||||||
|  |  | ||||||
|  |   GridBase* SinglePrecGrid; //Grid for single-precision fields | ||||||
|  |   LinearOperatorBase<FieldF> &Linop_f; //single precision | ||||||
|  |  | ||||||
|  |   ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts, | ||||||
|  | 				       GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f, | ||||||
|  | 				       int _ReliableUpdateFreq | ||||||
|  | 				       ) :  | ||||||
|  |     MaxIterations(maxit),  shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq) | ||||||
|  |   {  | ||||||
|  |     verbose=1; | ||||||
|  |     IterationsToCompleteShift.resize(_shifts.order); | ||||||
|  |     TrueResidualShift.resize(_shifts.order); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi) | ||||||
|  |   { | ||||||
|  |     GridBase *grid = src.Grid(); | ||||||
|  |     int nshift = shifts.order; | ||||||
|  |     std::vector<FieldD> results(nshift,grid); | ||||||
|  |     (*this)(Linop,src,results,psi); | ||||||
|  |   } | ||||||
|  |   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi) | ||||||
|  |   { | ||||||
|  |     int nshift = shifts.order; | ||||||
|  |  | ||||||
|  |     (*this)(Linop,src,results); | ||||||
|  |    | ||||||
|  |     psi = shifts.norm*src; | ||||||
|  |     for(int i=0;i<nshift;i++){ | ||||||
|  |       psi = psi + shifts.residues[i]*results[i]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d) | ||||||
|  |   {  | ||||||
|  |     GridBase *DoublePrecGrid = src_d.Grid(); | ||||||
|  |     precisionChangeWorkspace wk_f_from_d(SinglePrecGrid, DoublePrecGrid); | ||||||
|  |     precisionChangeWorkspace wk_d_from_f(DoublePrecGrid, SinglePrecGrid); | ||||||
|  |  | ||||||
|  |     //////////////////////////////////////////////////////////////////////// | ||||||
|  |     // Convenience references to the info stored in "MultiShiftFunction" | ||||||
|  |     //////////////////////////////////////////////////////////////////////// | ||||||
|  |     int nshift = shifts.order; | ||||||
|  |  | ||||||
|  |     std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts" | ||||||
|  |     std::vector<RealD> &mresidual(shifts.tolerances); | ||||||
|  |     std::vector<RealD> alpha(nshift,1.0); | ||||||
|  |  | ||||||
|  |     //Double precision search directions | ||||||
|  |     FieldD p_d(DoublePrecGrid); | ||||||
|  |     std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision) | ||||||
|  |  | ||||||
|  |     FieldD tmp_d(DoublePrecGrid); | ||||||
|  |     FieldD r_d(DoublePrecGrid); | ||||||
|  |     FieldD mmp_d(DoublePrecGrid); | ||||||
|  |  | ||||||
|  |     assert(psi_d.size()==nshift); | ||||||
|  |     assert(mass.size()==nshift); | ||||||
|  |     assert(mresidual.size()==nshift); | ||||||
|  |    | ||||||
|  |     // dynamic sized arrays on stack; 2d is a pain with vector | ||||||
|  |     RealD  bs[nshift]; | ||||||
|  |     RealD  rsq[nshift]; | ||||||
|  |     RealD  z[nshift][2]; | ||||||
|  |     int     converged[nshift]; | ||||||
|  |    | ||||||
|  |     const int       primary =0; | ||||||
|  |    | ||||||
|  |     //Primary shift fields CG iteration | ||||||
|  |     RealD a,b,c,d; | ||||||
|  |     RealD cp,bp,qq; //prev | ||||||
|  |    | ||||||
|  |     // Matrix mult fields | ||||||
|  |     FieldF r_f(SinglePrecGrid); | ||||||
|  |     FieldF p_f(SinglePrecGrid); | ||||||
|  |     FieldF tmp_f(SinglePrecGrid); | ||||||
|  |     FieldF mmp_f(SinglePrecGrid); | ||||||
|  |     FieldF src_f(SinglePrecGrid); | ||||||
|  |     precisionChange(src_f, src_d, wk_f_from_d); | ||||||
|  |  | ||||||
|  |     // Check lightest mass | ||||||
|  |     for(int s=0;s<nshift;s++){ | ||||||
|  |       assert( mass[s]>= mass[primary] ); | ||||||
|  |       converged[s]=0; | ||||||
|  |     } | ||||||
|  |    | ||||||
|  |     // Wire guess to zero | ||||||
|  |     // Residuals "r" are src | ||||||
|  |     // First search direction "p" is also src | ||||||
|  |     cp = norm2(src_d); | ||||||
|  |  | ||||||
|  |     // Handle trivial case of zero src. | ||||||
|  |     if( cp == 0. ){ | ||||||
|  |       for(int s=0;s<nshift;s++){ | ||||||
|  | 	psi_d[s] = Zero(); | ||||||
|  | 	IterationsToCompleteShift[s] = 1; | ||||||
|  | 	TrueResidualShift[s] = 0.; | ||||||
|  |       } | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for(int s=0;s<nshift;s++){ | ||||||
|  |       rsq[s] = cp * mresidual[s] * mresidual[s]; | ||||||
|  |       std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl; | ||||||
|  |       ps_d[s] = src_d; | ||||||
|  |     } | ||||||
|  |     // r and p for primary | ||||||
|  |     r_f=src_f; //residual maintained in single | ||||||
|  |     p_f=src_f; | ||||||
|  |     p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys | ||||||
|  |    | ||||||
|  |     //MdagM+m[0] | ||||||
|  |     Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) | ||||||
|  |     axpy(mmp_f,mass[0],p_f,mmp_f); | ||||||
|  |     RealD rn = norm2(p_f); | ||||||
|  |     d += rn*mass[0]; | ||||||
|  |  | ||||||
|  |     b = -cp /d; | ||||||
|  |    | ||||||
|  |     // Set up the various shift variables | ||||||
|  |     int       iz=0; | ||||||
|  |     z[0][1-iz] = 1.0; | ||||||
|  |     z[0][iz]   = 1.0; | ||||||
|  |     bs[0]      = b; | ||||||
|  |     for(int s=1;s<nshift;s++){ | ||||||
|  |       z[s][1-iz] = 1.0; | ||||||
|  |       z[s][iz]   = 1.0/( 1.0 - b*(mass[s]-mass[0])); | ||||||
|  |       bs[s]      = b*z[s][iz];  | ||||||
|  |     } | ||||||
|  |    | ||||||
|  |     // r += b[0] A.p[0] | ||||||
|  |     // c= norm(r) | ||||||
|  |     c=axpy_norm(r_f,b,mmp_f,r_f); | ||||||
|  |    | ||||||
|  |     for(int s=0;s<nshift;s++) { | ||||||
|  |       axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d); | ||||||
|  |     } | ||||||
|  |    | ||||||
|  |     /////////////////////////////////////// | ||||||
|  |     // Timers | ||||||
|  |     /////////////////////////////////////// | ||||||
|  |     GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer; | ||||||
|  |  | ||||||
|  |     SolverTimer.Start(); | ||||||
|  |    | ||||||
|  |     // Iteration loop | ||||||
|  |     int k; | ||||||
|  |    | ||||||
|  |     for (k=1;k<=MaxIterations;k++){     | ||||||
|  |       a = c /cp; | ||||||
|  |  | ||||||
|  |       //Update double precision search direction by residual | ||||||
|  |       PrecChangeTimer.Start(); | ||||||
|  |       precisionChange(r_d, r_f, wk_d_from_f); | ||||||
|  |       PrecChangeTimer.Stop(); | ||||||
|  |  | ||||||
|  |       AXPYTimer.Start(); | ||||||
|  |       axpy(p_d,a,p_d,r_d);  | ||||||
|  |  | ||||||
|  |       for(int s=0;s<nshift;s++){ | ||||||
|  | 	if ( ! converged[s] ) {  | ||||||
|  | 	  if (s==0){ | ||||||
|  | 	    axpy(ps_d[s],a,ps_d[s],r_d); | ||||||
|  | 	  } else{ | ||||||
|  | 	    RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b); | ||||||
|  | 	    axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]); | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |       AXPYTimer.Stop(); | ||||||
|  |  | ||||||
|  |       PrecChangeTimer.Start(); | ||||||
|  |       precisionChange(p_f, p_d, wk_f_from_d); //get back single prec search direction for linop | ||||||
|  |       PrecChangeTimer.Stop(); | ||||||
|  |  | ||||||
|  |       cp=c; | ||||||
|  |       MatrixTimer.Start();   | ||||||
|  |       Linop_f.HermOp(p_f,mmp_f);  | ||||||
|  |       d=real(innerProduct(p_f,mmp_f));     | ||||||
|  |       MatrixTimer.Stop();   | ||||||
|  |  | ||||||
|  |       AXPYTimer.Start(); | ||||||
|  |       axpy(mmp_f,mass[0],p_f,mmp_f); | ||||||
|  |       AXPYTimer.Stop(); | ||||||
|  |       RealD rn = norm2(p_f); | ||||||
|  |       d += rn*mass[0]; | ||||||
|  |      | ||||||
|  |       bp=b; | ||||||
|  |       b=-cp/d; | ||||||
|  |      | ||||||
|  |       // Toggle the recurrence history | ||||||
|  |       bs[0] = b; | ||||||
|  |       iz = 1-iz; | ||||||
|  |       ShiftTimer.Start(); | ||||||
|  |       for(int s=1;s<nshift;s++){ | ||||||
|  | 	if((!converged[s])){ | ||||||
|  | 	  RealD z0 = z[s][1-iz]; | ||||||
|  | 	  RealD z1 = z[s][iz]; | ||||||
|  | 	  z[s][iz] = z0*z1*bp | ||||||
|  | 	    / (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));  | ||||||
|  | 	  bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |       ShiftTimer.Stop(); | ||||||
|  |  | ||||||
|  |       //Update double precision solutions | ||||||
|  |       AXPYTimer.Start(); | ||||||
|  |       for(int s=0;s<nshift;s++){ | ||||||
|  | 	int ss = s; | ||||||
|  | 	if( (!converged[s]) ) {  | ||||||
|  | 	  axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]); | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       //Perform reliable update if necessary; otherwise update residual from single-prec mmp | ||||||
|  |       RealD c_f = axpy_norm(r_f,b,mmp_f,r_f); | ||||||
|  |       AXPYTimer.Stop(); | ||||||
|  |  | ||||||
|  |       c = c_f; | ||||||
|  |  | ||||||
|  |       if(k % ReliableUpdateFreq == 0){ | ||||||
|  | 	//Replace r with true residual | ||||||
|  | 	MatrixTimer.Start();   | ||||||
|  | 	Linop_d.HermOp(psi_d[0],mmp_d);  | ||||||
|  | 	MatrixTimer.Stop();   | ||||||
|  |  | ||||||
|  | 	AXPYTimer.Start(); | ||||||
|  | 	axpy(mmp_d,mass[0],psi_d[0],mmp_d); | ||||||
|  |  | ||||||
|  | 	RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d); | ||||||
|  | 	AXPYTimer.Stop(); | ||||||
|  |  | ||||||
|  | 	std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl; | ||||||
|  | 	 | ||||||
|  | 	PrecChangeTimer.Start(); | ||||||
|  | 	precisionChange(r_f, r_d, wk_f_from_d); | ||||||
|  | 	PrecChangeTimer.Stop(); | ||||||
|  | 	c = c_d; | ||||||
|  |       } | ||||||
|  |      | ||||||
|  |       // Convergence checks | ||||||
|  |       int all_converged = 1; | ||||||
|  |       for(int s=0;s<nshift;s++){ | ||||||
|  |        | ||||||
|  | 	if ( (!converged[s]) ){ | ||||||
|  | 	  IterationsToCompleteShift[s] = k; | ||||||
|  | 	 | ||||||
|  | 	  RealD css  = c * z[s][iz]* z[s][iz]; | ||||||
|  | 	 | ||||||
|  | 	  if(css<rsq[s]){ | ||||||
|  | 	    if ( ! converged[s] ) | ||||||
|  | 	      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl; | ||||||
|  | 	    converged[s]=1; | ||||||
|  | 	  } else { | ||||||
|  | 	    all_converged=0; | ||||||
|  | 	  } | ||||||
|  |  | ||||||
|  | 	} | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       if ( all_converged ){ | ||||||
|  |  | ||||||
|  | 	SolverTimer.Stop(); | ||||||
|  | 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl; | ||||||
|  | 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl; | ||||||
|  |        | ||||||
|  | 	// Check answers  | ||||||
|  | 	for(int s=0; s < nshift; s++) {  | ||||||
|  | 	  Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq); | ||||||
|  | 	  axpy(tmp_d,mass[s],psi_d[s],mmp_d); | ||||||
|  | 	  axpy(r_d,-alpha[s],src_d,tmp_d); | ||||||
|  | 	  RealD rn = norm2(r_d); | ||||||
|  | 	  RealD cn = norm2(src_d); | ||||||
|  | 	  TrueResidualShift[s] = std::sqrt(rn/cn); | ||||||
|  | 	  std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl; | ||||||
|  |  | ||||||
|  | 	  //If we have not reached the desired tolerance, do a (mixed precision) CG cleanup | ||||||
|  | 	  if(rn >= rsq[s]){ | ||||||
|  | 	    CleanupTimer.Start(); | ||||||
|  | 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl; | ||||||
|  |  | ||||||
|  | 	    //Setup linear operators for final cleanup | ||||||
|  | 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]); | ||||||
|  | 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]); | ||||||
|  | 					        | ||||||
|  | 	    MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);  | ||||||
|  | 	    cg(src_d, psi_d[s]); | ||||||
|  | 	     | ||||||
|  | 	    TrueResidualShift[s] = cg.TrueResidual; | ||||||
|  | 	    CleanupTimer.Stop(); | ||||||
|  | 	  } | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\tSolver    " << SolverTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\t\tAXPY    " << AXPYTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\t\tMatrix    " << MatrixTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\t\tShift    " << ShiftTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed()     <<std::endl; | ||||||
|  | 	std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl; | ||||||
|  |  | ||||||
|  | 	IterationsToComplete = k;	 | ||||||
|  |  | ||||||
|  | 	return; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |     | ||||||
|  |     } | ||||||
|  |     // ugly hack | ||||||
|  |     std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl; | ||||||
|  |     //  assert(0); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  | #endif | ||||||
| @@ -54,15 +54,23 @@ class DeflatedGuesser: public LinearFunction<Field> { | |||||||
| private: | private: | ||||||
|   const std::vector<Field> &evec; |   const std::vector<Field> &evec; | ||||||
|   const std::vector<RealD> &eval; |   const std::vector<RealD> &eval; | ||||||
|  |   const unsigned int       N; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; |   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) | ||||||
|  |   : DeflatedGuesser(_evec, _eval, _evec.size()) | ||||||
|  |   {} | ||||||
|  |  | ||||||
|  |   DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N) | ||||||
|  |   : evec(_evec), eval(_eval), N(_N) | ||||||
|  |   { | ||||||
|  |     assert(evec.size()==eval.size()); | ||||||
|  |     assert(N <= evec.size()); | ||||||
|  |   }  | ||||||
|  |  | ||||||
|   virtual void operator()(const Field &src,Field &guess) { |   virtual void operator()(const Field &src,Field &guess) { | ||||||
|     guess = Zero(); |     guess = Zero(); | ||||||
|     assert(evec.size()==eval.size()); |  | ||||||
|     auto N = evec.size(); |  | ||||||
|     for (int i=0;i<N;i++) { |     for (int i=0;i<N;i++) { | ||||||
|       const Field& tmp = evec[i]; |       const Field& tmp = evec[i]; | ||||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); |       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    *        (-MoeMee^{-1}   1 )    |    *        (-MoeMee^{-1}   1 )    | ||||||
|    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) |    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) | ||||||
|    *           ( 0       1                    ) |    *           ( 0       1                    ) | ||||||
|    * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} ) |    * L^{-dag}= ( 1      -Mee^{-dag} Moe^{dag} ) | ||||||
|    *           ( 0       1                    ) |    *           ( 0       1                    ) | ||||||
|    * |    * | ||||||
|    * U^-1 = (1   -Mee^{-1} Meo) |    * U^-1 = (1   -Mee^{-1} Meo) | ||||||
| @@ -82,7 +82,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o |    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o | ||||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) |    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||||
|    *                              psi_o = M_oo^-1 phi_o |    *                              psi_o = M_oo^-1 phi_o | ||||||
|    * TODO: Deflation  |    * | ||||||
|  |    * | ||||||
|    */ |    */ | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -97,6 +98,7 @@ namespace Grid { | |||||||
|   protected: |   protected: | ||||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; |     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||||
|     OperatorFunction<Field> & _HermitianRBSolver; |     OperatorFunction<Field> & _HermitianRBSolver; | ||||||
|  |      | ||||||
|     int CBfactorise; |     int CBfactorise; | ||||||
|     bool subGuess; |     bool subGuess; | ||||||
|     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver |     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver | ||||||
| @@ -132,6 +134,31 @@ namespace Grid { | |||||||
|       (*this)(_Matrix,in,out,guess); |       (*this)(_Matrix,in,out,guess); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)  | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       Field tmp(grid); | ||||||
|  |       int nblock = in.size(); | ||||||
|  |       for(int b=0;b<nblock;b++){ | ||||||
|  | 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     // James can write his own deflated guesser | ||||||
|  |     // with optimised code for the inner products | ||||||
|  |     //    RedBlackSolveSplitGrid(); | ||||||
|  |     //    RedBlackSolve(_Matrix,src_o,sol_o);  | ||||||
|  |  | ||||||
|  |     void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out) | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       Field tmp(grid); | ||||||
|  |       int nblock = in.size(); | ||||||
|  |       for(int b=0;b<nblock;b++) { | ||||||
|  | 	pickCheckerboard(Even,tmp,in[b]); | ||||||
|  | 	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     template<class Guesser> |     template<class Guesser> | ||||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  |     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  | ||||||
|     { |     { | ||||||
| @@ -150,22 +177,27 @@ namespace Grid { | |||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Prepare RedBlack source |       // Prepare RedBlack source | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       for(int b=0;b<nblock;b++){ |       RedBlackSource(_Matrix,in,src_o); | ||||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | 	//      for(int b=0;b<nblock;b++){ | ||||||
|       } | 	//	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||||
|  | 	//      } | ||||||
|  |        | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Make the guesses |       // Make the guesses | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       if ( subGuess ) guess_save.resize(nblock,grid); |       if ( subGuess ) guess_save.resize(nblock,grid); | ||||||
|  |  | ||||||
|       for(int b=0;b<nblock;b++){ |        | ||||||
|       if(useSolnAsInitGuess) { |       if(useSolnAsInitGuess) { | ||||||
|  |         for(int b=0;b<nblock;b++){ | ||||||
|           pickCheckerboard(Odd, sol_o[b], out[b]); |           pickCheckerboard(Odd, sol_o[b], out[b]); | ||||||
|  |         } | ||||||
|       } else { |       } else { | ||||||
|           guess(src_o[b],sol_o[b]);  |         guess(src_o, sol_o);  | ||||||
|       } |       } | ||||||
|  |  | ||||||
| 	    if ( subGuess ) {  | 	    if ( subGuess ) {  | ||||||
|  |         for(int b=0;b<nblock;b++){ | ||||||
|           guess_save[b] = sol_o[b]; |           guess_save[b] = sol_o[b]; | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
| @@ -190,11 +222,18 @@ namespace Grid { | |||||||
| 	// Check unprec residual if possible | 	// Check unprec residual if possible | ||||||
| 	///////////////////////////////////////////////// | 	///////////////////////////////////////////////// | ||||||
| 	if ( ! subGuess ) {	   | 	if ( ! subGuess ) {	   | ||||||
| 	  _Matrix.M(out[b],resid);  |  | ||||||
|  | 	  if ( this->adjoint() ) _Matrix.Mdag(out[b],resid);  | ||||||
|  | 	  else                   _Matrix.M(out[b],resid);  | ||||||
|  |  | ||||||
| 	  resid = resid-in[b]; | 	  resid = resid-in[b]; | ||||||
| 	  RealD ns = norm2(in[b]); | 	  RealD ns = norm2(in[b]); | ||||||
| 	  RealD nr = norm2(resid); | 	  RealD nr = norm2(resid); | ||||||
| 	 | 	 | ||||||
|  | 	  std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; | ||||||
|  | 	  if ( this->adjoint() )  | ||||||
|  | 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||||
|  | 	  else                    | ||||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||||
| 	} else { | 	} else { | ||||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | ||||||
| @@ -249,12 +288,21 @@ namespace Grid { | |||||||
|  |  | ||||||
|       // Verify the unprec residual |       // Verify the unprec residual | ||||||
|       if ( ! subGuess ) { |       if ( ! subGuess ) { | ||||||
|         _Matrix.M(out,resid);  |  | ||||||
|  | 	std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; | ||||||
|  | 	 | ||||||
|  | 	if ( this->adjoint() ) _Matrix.Mdag(out,resid);  | ||||||
|  | 	else                   _Matrix.M(out,resid);  | ||||||
|  |  | ||||||
|         resid = resid-in; |         resid = resid-in; | ||||||
|         RealD ns = norm2(in); |         RealD ns = norm2(in); | ||||||
|         RealD nr = norm2(resid); |         RealD nr = norm2(resid); | ||||||
|  |  | ||||||
|         std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl; | 	  if ( this->adjoint() )  | ||||||
|  | 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; | ||||||
|  | 	  else                    | ||||||
|  | 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; | ||||||
|  |  | ||||||
|       } else { |       } else { | ||||||
|         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; |         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; | ||||||
|       } |       } | ||||||
| @@ -263,6 +311,7 @@ namespace Grid { | |||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
|     // Override in derived.  |     // Override in derived.  | ||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
|  |     virtual bool adjoint(void) { return false; } | ||||||
|     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; |     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; | ||||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; |     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; | ||||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; |     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; | ||||||
| @@ -616,6 +665,127 @@ namespace Grid { | |||||||
|         this->_HermitianRBSolver(_OpEO, src_o, sol_o);  |         this->_HermitianRBSolver(_OpEO, src_o, sol_o);  | ||||||
|       } |       } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |   /* | ||||||
|  |    * Red black Schur decomposition | ||||||
|  |    * | ||||||
|  |    *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo) | ||||||
|  |    *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         ) | ||||||
|  |    *                =         L                     D                     U | ||||||
|  |    * | ||||||
|  |    * L^-1 = (1              0 ) | ||||||
|  |    *        (-MoeMee^{-1}   1 )    | ||||||
|  |    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) | ||||||
|  |    *           ( 0       1                    ) | ||||||
|  |    * | ||||||
|  |    * U^-1 = (1   -Mee^{-1} Meo) | ||||||
|  |    *        (0    1           ) | ||||||
|  |    * U^{dag} = ( 1                 0) | ||||||
|  |    *           (Meo^dag Mee^{-dag} 1) | ||||||
|  |    * U^{-dag} = (  1                 0) | ||||||
|  |    *            (-Meo^dag Mee^{-dag} 1) | ||||||
|  |    * | ||||||
|  |    * | ||||||
|  |    *********************** | ||||||
|  |    *     M^dag psi = eta | ||||||
|  |    *********************** | ||||||
|  |    * | ||||||
|  |    * Really for Mobius: (Wilson - easier to just use gamma 5 hermiticity) | ||||||
|  |    * | ||||||
|  |    *    Mdag psi     =         Udag  Ddag  Ldag psi = eta | ||||||
|  |    * | ||||||
|  |    * U^{-dag} = (  1                 0) | ||||||
|  |    *            (-Meo^dag Mee^{-dag} 1) | ||||||
|  |    * | ||||||
|  |    * | ||||||
|  |    * i)                D^dag phi =  (U^{-dag}  eta) | ||||||
|  |    *                        eta'_e = eta_e | ||||||
|  |    *                        eta'_o = (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||||
|  |    *  | ||||||
|  |    *      phi_o = D_oo^-dag eta'_o = D_oo^-dag (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||||
|  |    * | ||||||
|  |    *      phi_e = D_ee^-dag eta'_e = D_ee^-dag eta_e | ||||||
|  |    *  | ||||||
|  |    * Solve:  | ||||||
|  |    * | ||||||
|  |    *      D_oo D_oo^dag phi_o = D_oo (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||||
|  |    * | ||||||
|  |    * ii)  | ||||||
|  |    *      phi = L^dag psi => psi = L^-dag phi.  | ||||||
|  |    * | ||||||
|  |    * L^{-dag} = ( 1      -Mee^{-dag} Moe^{dag} ) | ||||||
|  |    *            ( 0       1                    ) | ||||||
|  |    * | ||||||
|  |    *   => sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... | ||||||
|  |    *   => sol_o = phi_o | ||||||
|  |    */ | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Site diagonal has Mooee on it, but solve the Adjoint system | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   template<class Field> class SchurRedBlackDiagMooeeDagSolve : public SchurRedBlackBase<Field> { | ||||||
|  |   public: | ||||||
|  |     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||||
|  |  | ||||||
|  |     virtual bool adjoint(void) { return true; } | ||||||
|  |     SchurRedBlackDiagMooeeDagSolve(OperatorFunction<Field> &HermitianRBSolver, | ||||||
|  | 				   const bool initSubGuess = false, | ||||||
|  | 				   const bool _solnAsInitGuess = false)   | ||||||
|  |       : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; | ||||||
|  |  | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     // Override RedBlack specialisation | ||||||
|  |     ////////////////////////////////////////////////////// | ||||||
|  |     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       GridBase *fgrid= _Matrix.Grid(); | ||||||
|  |  | ||||||
|  |       Field   tmp(grid); | ||||||
|  |       Field  Mtmp(grid); | ||||||
|  |  | ||||||
|  |       pickCheckerboard(Even,src_e,src); | ||||||
|  |       pickCheckerboard(Odd ,src_o,src); | ||||||
|  |       ///////////////////////////////////////////////////// | ||||||
|  |       // src_o = (source_o - Moe^dag MeeInvDag source_e) | ||||||
|  |       ///////////////////////////////////////////////////// | ||||||
|  |       _Matrix.MooeeInvDag(src_e,tmp);  assert(  tmp.Checkerboard() ==Even); | ||||||
|  |       _Matrix.MeooeDag   (tmp,Mtmp);   assert( Mtmp.Checkerboard() ==Odd);      | ||||||
|  |       tmp=src_o-Mtmp;                  assert(  tmp.Checkerboard() ==Odd);      | ||||||
|  |  | ||||||
|  |       // get the right Mpc | ||||||
|  |       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||||
|  |       _HermOpEO.Mpc(tmp,src_o);     assert(src_o.Checkerboard() ==Odd); | ||||||
|  |     } | ||||||
|  |     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) | ||||||
|  |     { | ||||||
|  |       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||||
|  |       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  | ||||||
|  |     }; | ||||||
|  |     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) | ||||||
|  |     { | ||||||
|  |       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||||
|  |       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); | ||||||
|  |     } | ||||||
|  |     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) | ||||||
|  |     { | ||||||
|  |       GridBase *grid = _Matrix.RedBlackGrid(); | ||||||
|  |       GridBase *fgrid= _Matrix.Grid(); | ||||||
|  |  | ||||||
|  |       Field  sol_e(grid); | ||||||
|  |       Field  tmp(grid); | ||||||
|  |       /////////////////////////////////////////////////// | ||||||
|  |       // sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... | ||||||
|  |       // sol_o = phi_o | ||||||
|  |       /////////////////////////////////////////////////// | ||||||
|  |       _Matrix.MeooeDag(sol_o,tmp);      assert(tmp.Checkerboard()==Even); | ||||||
|  |       tmp = src_e-tmp;                  assert(tmp.Checkerboard()==Even); | ||||||
|  |       _Matrix.MooeeInvDag(tmp,sol_e);   assert(sol_e.Checkerboard()==Even); | ||||||
|  |        | ||||||
|  |       setCheckerboard(sol,sol_e); assert(  sol_e.Checkerboard() ==Even); | ||||||
|  |       setCheckerboard(sol,sol_o); assert(  sol_o.Checkerboard() ==Odd ); | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -9,14 +9,30 @@ NAMESPACE_BEGIN(Grid); | |||||||
| #define AccSmall (3) | #define AccSmall (3) | ||||||
| #define Shared   (4) | #define Shared   (4) | ||||||
| #define SharedSmall (5) | #define SharedSmall (5) | ||||||
|  | #undef GRID_MM_VERBOSE  | ||||||
| uint64_t total_shared; | uint64_t total_shared; | ||||||
| uint64_t total_device; | uint64_t total_device; | ||||||
| uint64_t total_host;; | uint64_t total_host;; | ||||||
| void MemoryManager::PrintBytes(void) | void MemoryManager::PrintBytes(void) | ||||||
| { | { | ||||||
|   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; |   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; |   std::cout << " MemoryManager : PrintBytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; |   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; | ||||||
|  |   std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared      Mbytes "<<std::endl; | ||||||
|  |   std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl; | ||||||
|  |   std::cout << " MemoryManager : "<<(total_host>>20)  <<" cpu         Mbytes "<<std::endl; | ||||||
|  |   uint64_t cacheBytes; | ||||||
|  |   cacheBytes = CacheBytes[Cpu]; | ||||||
|  |   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl; | ||||||
|  |   cacheBytes = CacheBytes[Acc]; | ||||||
|  |   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl; | ||||||
|  |   cacheBytes = CacheBytes[Shared]; | ||||||
|  |   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl; | ||||||
|  |    | ||||||
|  | #ifdef GRID_CUDA | ||||||
|  |   cuda_mem(); | ||||||
|  | #endif | ||||||
|  |    | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| @@ -24,86 +40,114 @@ void MemoryManager::PrintBytes(void) | |||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; | MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; | ||||||
| int MemoryManager::Victim[MemoryManager::NallocType]; | int MemoryManager::Victim[MemoryManager::NallocType]; | ||||||
| int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; | int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 }; | ||||||
|  | uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType]; | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| // Actual allocation and deallocation utils | // Actual allocation and deallocation utils | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| void *MemoryManager::AcceleratorAllocate(size_t bytes) | void *MemoryManager::AcceleratorAllocate(size_t bytes) | ||||||
| { | { | ||||||
|  |   total_device+=bytes; | ||||||
|   void *ptr = (void *) Lookup(bytes,Acc); |   void *ptr = (void *) Lookup(bytes,Acc); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocDevice(bytes); |     ptr = (void *) acceleratorAllocDevice(bytes); | ||||||
|     total_device+=bytes; |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"AcceleratorAllocate "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::AcceleratorFree    (void *ptr,size_t bytes) | void  MemoryManager::AcceleratorFree    (void *ptr,size_t bytes) | ||||||
| { | { | ||||||
|  |   total_device-=bytes; | ||||||
|   void *__freeme = Insert(ptr,bytes,Acc); |   void *__freeme = Insert(ptr,bytes,Acc); | ||||||
|   if ( __freeme ) { |   if ( __freeme ) { | ||||||
|     acceleratorFreeDevice(__freeme); |     acceleratorFreeDevice(__freeme); | ||||||
|     total_device-=bytes; |  | ||||||
|     //    PrintBytes(); |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"AcceleratorFree "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| void *MemoryManager::SharedAllocate(size_t bytes) | void *MemoryManager::SharedAllocate(size_t bytes) | ||||||
| { | { | ||||||
|  |   total_shared+=bytes; | ||||||
|   void *ptr = (void *) Lookup(bytes,Shared); |   void *ptr = (void *) Lookup(bytes,Shared); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|     total_shared+=bytes; |  | ||||||
|     //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; |  | ||||||
|     //    PrintBytes(); |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"SharedAllocate "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::SharedFree    (void *ptr,size_t bytes) | void  MemoryManager::SharedFree    (void *ptr,size_t bytes) | ||||||
| { | { | ||||||
|  |   total_shared-=bytes; | ||||||
|   void *__freeme = Insert(ptr,bytes,Shared); |   void *__freeme = Insert(ptr,bytes,Shared); | ||||||
|   if ( __freeme ) { |   if ( __freeme ) { | ||||||
|     acceleratorFreeShared(__freeme); |     acceleratorFreeShared(__freeme); | ||||||
|     total_shared-=bytes; |  | ||||||
|     //    PrintBytes(); |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"SharedFree "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| #ifdef GRID_UVM | #ifdef GRID_UVM | ||||||
| void *MemoryManager::CpuAllocate(size_t bytes) | void *MemoryManager::CpuAllocate(size_t bytes) | ||||||
| { | { | ||||||
|  |   total_host+=bytes; | ||||||
|   void *ptr = (void *) Lookup(bytes,Cpu); |   void *ptr = (void *) Lookup(bytes,Cpu); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|     total_host+=bytes; |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"CpuAllocate "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||||
| { | { | ||||||
|  |   total_host-=bytes; | ||||||
|   NotifyDeletion(_ptr); |   NotifyDeletion(_ptr); | ||||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); |   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||||
|   if ( __freeme ) {  |   if ( __freeme ) {  | ||||||
|     acceleratorFreeShared(__freeme); |     acceleratorFreeShared(__freeme); | ||||||
|     total_host-=bytes; |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"CpuFree "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| #else | #else | ||||||
| void *MemoryManager::CpuAllocate(size_t bytes) | void *MemoryManager::CpuAllocate(size_t bytes) | ||||||
| { | { | ||||||
|  |   total_host+=bytes; | ||||||
|   void *ptr = (void *) Lookup(bytes,Cpu); |   void *ptr = (void *) Lookup(bytes,Cpu); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocCpu(bytes); |     ptr = (void *) acceleratorAllocCpu(bytes); | ||||||
|     total_host+=bytes; |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"CpuAllocate "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||||
| { | { | ||||||
|  |   total_host-=bytes; | ||||||
|   NotifyDeletion(_ptr); |   NotifyDeletion(_ptr); | ||||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); |   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||||
|   if ( __freeme ) {  |   if ( __freeme ) {  | ||||||
|     acceleratorFreeCpu(__freeme); |     acceleratorFreeCpu(__freeme); | ||||||
|     total_host-=bytes; |  | ||||||
|   } |   } | ||||||
|  | #ifdef GRID_MM_VERBOSE | ||||||
|  |   std::cout <<"CpuFree "<<std::endl; | ||||||
|  |   PrintBytes(); | ||||||
|  | #endif | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| @@ -181,13 +225,13 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,int type) | |||||||
| #ifdef ALLOCATION_CACHE | #ifdef ALLOCATION_CACHE | ||||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); |   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||||
|   int cache = type + small; |   int cache = type + small; | ||||||
|   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache]);   |   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);   | ||||||
| #else | #else | ||||||
|   return ptr; |   return ptr; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim)  | void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)  | ||||||
| { | { | ||||||
|   assert(ncache>0); |   assert(ncache>0); | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| @@ -211,6 +255,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|  |  | ||||||
|   if ( entries[v].valid ) { |   if ( entries[v].valid ) { | ||||||
|     ret = entries[v].address; |     ret = entries[v].address; | ||||||
|  |     cacheBytes -= entries[v].bytes; | ||||||
|     entries[v].valid = 0; |     entries[v].valid = 0; | ||||||
|     entries[v].address = NULL; |     entries[v].address = NULL; | ||||||
|     entries[v].bytes = 0; |     entries[v].bytes = 0; | ||||||
| @@ -219,6 +264,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|   entries[v].address=ptr; |   entries[v].address=ptr; | ||||||
|   entries[v].bytes  =bytes; |   entries[v].bytes  =bytes; | ||||||
|   entries[v].valid  =1; |   entries[v].valid  =1; | ||||||
|  |   cacheBytes += bytes; | ||||||
|  |  | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
| @@ -228,13 +274,13 @@ void *MemoryManager::Lookup(size_t bytes,int type) | |||||||
| #ifdef ALLOCATION_CACHE | #ifdef ALLOCATION_CACHE | ||||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); |   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||||
|   int cache = type+small; |   int cache = type+small; | ||||||
|   return Lookup(bytes,Entries[cache],Ncache[cache]); |   return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]); | ||||||
| #else | #else | ||||||
|   return NULL; |   return NULL; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache)  | void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)  | ||||||
| { | { | ||||||
|   assert(ncache>0); |   assert(ncache>0); | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| @@ -243,6 +289,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach | |||||||
|   for(int e=0;e<ncache;e++){ |   for(int e=0;e<ncache;e++){ | ||||||
|     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { |     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { | ||||||
|       entries[e].valid = 0; |       entries[e].valid = 0; | ||||||
|  |       cacheBytes -= entries[e].bytes; | ||||||
|       return entries[e].address; |       return entries[e].address; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -82,14 +82,15 @@ private: | |||||||
|   static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; |   static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; | ||||||
|   static int Victim[NallocType]; |   static int Victim[NallocType]; | ||||||
|   static int Ncache[NallocType]; |   static int Ncache[NallocType]; | ||||||
|  |   static uint64_t CacheBytes[NallocType]; | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////// |   ///////////////////////////////////////////////// | ||||||
|   // Free pool |   // Free pool | ||||||
|   ///////////////////////////////////////////////// |   ///////////////////////////////////////////////// | ||||||
|   static void *Insert(void *ptr,size_t bytes,int type) ; |   static void *Insert(void *ptr,size_t bytes,int type) ; | ||||||
|   static void *Lookup(size_t bytes,int type) ; |   static void *Lookup(size_t bytes,int type) ; | ||||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; |   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ; | ||||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; |   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ; | ||||||
|  |  | ||||||
|   static void PrintBytes(void); |   static void PrintBytes(void); | ||||||
|  public: |  public: | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ | |||||||
|  |  | ||||||
| #warning "Using explicit device memory copies" | #warning "Using explicit device memory copies" | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
| //define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | //#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | ||||||
| #define dprintf(...) | #define dprintf(...) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -429,6 +429,7 @@ void  MemoryManager::NotifyDeletion(void *_ptr) | |||||||
| } | } | ||||||
| void  MemoryManager::Print(void) | void  MemoryManager::Print(void) | ||||||
| { | { | ||||||
|  |   PrintBytes(); | ||||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; |   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||||
|   std::cout << GridLogDebug << "Memory Manager                             " << std::endl; |   std::cout << GridLogDebug << "Memory Manager                             " << std::endl; | ||||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; |   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||||
|   | |||||||
| @@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | bool Stencil_force_mpi = true; | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | extern bool Stencil_force_mpi ; | ||||||
|  |  | ||||||
| class CartesianCommunicator : public SharedMemory { | class CartesianCommunicator : public SharedMemory { | ||||||
|  |  | ||||||
| public:     | public:     | ||||||
|   | |||||||
| @@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   double off_node_bytes=0.0; |   double off_node_bytes=0.0; | ||||||
|   int tag; |   int tag; | ||||||
|  |  | ||||||
|   if ( gfrom ==MPI_UNDEFINED) { |   if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||||
|     tag= dir+from*32; |     tag= dir+from*32; | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
| @@ -378,12 +378,18 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( gdest == MPI_UNDEFINED ) { |   if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||||
|     tag= dir+_processor*32; |     tag= dir+_processor*32; | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|  |   } else { | ||||||
|  |     // TODO : make a OMP loop on CPU, call threaded bcopy | ||||||
|  |     void *shm = (void *) this->ShmBufferTranslate(dest,recv); | ||||||
|  |     assert(shm!=NULL); | ||||||
|  |     acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); | ||||||
|  |     acceleratorCopySynchronise(); // MPI prob slower | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { | ||||||
|   | |||||||
| @@ -35,6 +35,9 @@ Author: Christoph Lehner <christoph@lhnr.de> | |||||||
| #endif | #endif | ||||||
| #ifdef GRID_HIP | #ifdef GRID_HIP | ||||||
| #include <hip/hip_runtime_api.h> | #include <hip/hip_runtime_api.h> | ||||||
|  | #endif | ||||||
|  | #ifdef GRID_SYCl | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid);  | NAMESPACE_BEGIN(Grid);  | ||||||
| @@ -70,6 +73,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) | |||||||
|   WorldNodes = WorldSize/WorldShmSize; |   WorldNodes = WorldSize/WorldShmSize; | ||||||
|   assert( (WorldNodes * WorldShmSize) == WorldSize ); |   assert( (WorldNodes * WorldShmSize) == WorldSize ); | ||||||
|  |  | ||||||
|  |  | ||||||
|   // FIXME: Check all WorldShmSize are the same ? |   // FIXME: Check all WorldShmSize are the same ? | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
| @@ -446,7 +450,47 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Hugetlbfs mapping intended | // Hugetlbfs mapping intended | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #if defined(GRID_CUDA) ||defined(GRID_HIP) | #if defined(GRID_CUDA) ||defined(GRID_HIP)  || defined(GRID_SYCL) | ||||||
|  |  | ||||||
|  | //if defined(GRID_SYCL) | ||||||
|  | #if 0 | ||||||
|  | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
|  | { | ||||||
|  |   void * ShmCommBuf ;  | ||||||
|  |   assert(_ShmSetup==1); | ||||||
|  |   assert(_ShmAlloc==0); | ||||||
|  |  | ||||||
|  |   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // allocate the pointer array for shared windows for our group | ||||||
|  |   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   MPI_Barrier(WorldShmComm); | ||||||
|  |   WorldShmCommBufs.resize(WorldShmSize); | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Each MPI rank should allocate our own buffer | ||||||
|  |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  |   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||||
|  |  | ||||||
|  |   if (ShmCommBuf == (void *)NULL ) { | ||||||
|  |     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||||
|  |     exit(EXIT_FAILURE);   | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||||
|  | 	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||||
|  |  | ||||||
|  |   SharedMemoryZero(ShmCommBuf,bytes); | ||||||
|  |  | ||||||
|  |   assert(WorldShmSize == 1); | ||||||
|  |   for(int r=0;r<WorldShmSize;r++){ | ||||||
|  |     WorldShmCommBufs[r] = ShmCommBuf; | ||||||
|  |   } | ||||||
|  |   _ShmAllocBytes=bytes; | ||||||
|  |   _ShmAlloc=1; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)   | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| { | { | ||||||
|   void * ShmCommBuf ;  |   void * ShmCommBuf ;  | ||||||
| @@ -470,18 +514,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   // Each MPI rank should allocate our own buffer |   // Each MPI rank should allocate our own buffer | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   ShmCommBuf = acceleratorAllocDevice(bytes); |   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||||
|  |  | ||||||
|   if (ShmCommBuf == (void *)NULL ) { |   if (ShmCommBuf == (void *)NULL ) { | ||||||
|     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; |     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||||
|     exit(EXIT_FAILURE);   |     exit(EXIT_FAILURE);   | ||||||
|   } |   } | ||||||
|   //  if ( WorldRank == 0 ){ |   if ( WorldRank == 0 ){ | ||||||
|   if ( 1 ){ |  | ||||||
|     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  |     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||||
|   } |   } | ||||||
|   SharedMemoryZero(ShmCommBuf,bytes); |   SharedMemoryZero(ShmCommBuf,bytes); | ||||||
|  |   std::cout<< "Setting up IPC"<<std::endl; | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Loop over ranks/gpu's on our node |   // Loop over ranks/gpu's on our node | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -491,6 +533,29 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // If it is me, pass around the IPC access key |     // If it is me, pass around the IPC access key | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|  |     void * thisBuf = ShmCommBuf; | ||||||
|  |     if(!Stencil_force_mpi) { | ||||||
|  | #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||||
|  |     typedef struct { int fd; pid_t pid ; } clone_mem_t; | ||||||
|  |  | ||||||
|  |     auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); | ||||||
|  |     auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); | ||||||
|  |        | ||||||
|  |     ze_ipc_mem_handle_t ihandle; | ||||||
|  |     clone_mem_t handle; | ||||||
|  |  | ||||||
|  |     if ( r==WorldShmRank ) {  | ||||||
|  |       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); | ||||||
|  |       if ( err != ZE_RESULT_SUCCESS ) { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||||
|  | 	exit(EXIT_FAILURE); | ||||||
|  |       } else { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||||
|  |       } | ||||||
|  |       memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int)); | ||||||
|  |       handle.pid = getpid(); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     cudaIpcMemHandle_t handle; |     cudaIpcMemHandle_t handle; | ||||||
|     if ( r==WorldShmRank ) {  |     if ( r==WorldShmRank ) {  | ||||||
| @@ -511,6 +576,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // Share this IPC handle across the Shm Comm |     // Share this IPC handle across the Shm Comm | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
| @@ -526,7 +592,35 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // If I am not the source, overwrite thisBuf with remote buffer |     // If I am not the source, overwrite thisBuf with remote buffer | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     void * thisBuf = ShmCommBuf; |  | ||||||
|  | #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||||
|  |     if ( r!=WorldShmRank ) { | ||||||
|  |       thisBuf = nullptr; | ||||||
|  |       std::cout<<"mapping seeking remote pid/fd " | ||||||
|  | 	       <<handle.pid<<"/" | ||||||
|  | 	       <<handle.fd<<std::endl; | ||||||
|  |  | ||||||
|  |       int pidfd = syscall(SYS_pidfd_open,handle.pid,0); | ||||||
|  |       std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n"; | ||||||
|  |       //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0); | ||||||
|  |       int myfd  = syscall(438,pidfd,handle.fd,0); | ||||||
|  |  | ||||||
|  |       std::cout<<"Using IpcHandle myfd "<<myfd<<"\n"; | ||||||
|  |        | ||||||
|  |       memcpy((void *)&ihandle,(void *)&myfd,sizeof(int)); | ||||||
|  |  | ||||||
|  |       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf); | ||||||
|  |       if ( err != ZE_RESULT_SUCCESS ) { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||||
|  | 	exit(EXIT_FAILURE); | ||||||
|  |       } else { | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl; | ||||||
|  | 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl; | ||||||
|  |       } | ||||||
|  |       assert(thisBuf!=nullptr); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     if ( r!=WorldShmRank ) {  |     if ( r!=WorldShmRank ) {  | ||||||
|       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); |       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); | ||||||
| @@ -548,6 +642,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // Save a copy of the device buffers |     // Save a copy of the device buffers | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|  |     } | ||||||
|     WorldShmCommBufs[r] = thisBuf; |     WorldShmCommBufs[r] = thisBuf; | ||||||
| #else | #else | ||||||
|     WorldShmCommBufs[r] = ShmCommBuf; |     WorldShmCommBufs[r] = ShmCommBuf; | ||||||
| @@ -557,6 +652,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   _ShmAllocBytes=bytes; |   _ShmAllocBytes=bytes; | ||||||
|   _ShmAlloc=1; |   _ShmAlloc=1; | ||||||
| } | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #else  | #else  | ||||||
| #ifdef GRID_MPI3_SHMMMAP | #ifdef GRID_MPI3_SHMMMAP | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| @@ -727,16 +824,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| ///////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////// | ||||||
| void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | ||||||
| { | { | ||||||
| #ifdef GRID_CUDA | #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | ||||||
|   cudaMemset(dest,0,bytes); |   acceleratorMemSet(dest,0,bytes); | ||||||
| #else | #else | ||||||
|   bzero(dest,bytes); |   bzero(dest,bytes); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | ||||||
| { | { | ||||||
| #ifdef GRID_CUDA | #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | ||||||
|   cudaMemcpy(dest,src,bytes,cudaMemcpyDefault); |   acceleratorCopyToDevice(src,dest,bytes); | ||||||
| #else    | #else    | ||||||
|   bcopy(src,dest,bytes); |   bcopy(src,dest,bytes); | ||||||
| #endif | #endif | ||||||
| @@ -800,7 +897,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) | |||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   SharedMemoryTest(); |   //SharedMemoryTest(); | ||||||
| } | } | ||||||
| ////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////// | ||||||
| // On node barrier | // On node barrier | ||||||
|   | |||||||
| @@ -46,3 +46,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/lattice/Lattice_unary.h> | #include <Grid/lattice/Lattice_unary.h> | ||||||
| #include <Grid/lattice/Lattice_transfer.h> | #include <Grid/lattice/Lattice_transfer.h> | ||||||
| #include <Grid/lattice/Lattice_basis.h> | #include <Grid/lattice/Lattice_basis.h> | ||||||
|  | #include <Grid/lattice/Lattice_crc.h> | ||||||
|   | |||||||
| @@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> & | |||||||
|   autoView( x_v , x, AcceleratorRead); |   autoView( x_v , x, AcceleratorRead); | ||||||
|   autoView( y_v , y, AcceleratorRead); |   autoView( y_v , y, AcceleratorRead); | ||||||
|   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ |   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ | ||||||
|     auto tmp = a*x_v(ss)+y_v(ss); |     auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]); | ||||||
|     coalescedWrite(ret_v[ss],tmp); |     coalescedWrite(ret_v[ss],tmp); | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
|  |  | ||||||
| 	for(int k=k0; k<k1; ++k){ | 	for(int k=k0; k<k1; ++k){ | ||||||
| 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | ||||||
| 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); | 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss])); | ||||||
| 	} | 	} | ||||||
|       }); |       }); | ||||||
|  |  | ||||||
| @@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
| 	int jj  =j0+j; | 	int jj  =j0+j; | ||||||
| 	int ss =sj/nrot; | 	int ss =sj/nrot; | ||||||
| 	int sss=ss+s; | 	int sss=ss+s; | ||||||
| 	coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); | 	coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||||
|       }); |       }); | ||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|   | |||||||
							
								
								
									
										55
									
								
								Grid/lattice/Lattice_crc.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								Grid/lattice/Lattice_crc.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/lattice/Lattice_crc.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1) | ||||||
|  | { | ||||||
|  |   auto ff = localNorm2(f); | ||||||
|  |   if ( mu==-1 ) mu = f.Grid()->Nd()-1; | ||||||
|  |   typedef typename vobj::tensor_reduced normtype; | ||||||
|  |   typedef typename normtype::scalar_object scalar; | ||||||
|  |   std::vector<scalar> sff; | ||||||
|  |   sliceSum(ff,sff,mu); | ||||||
|  |   for(int t=0;t<sff.size();t++){ | ||||||
|  |     std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class vobj> uint32_t crc(Lattice<vobj> & buf) | ||||||
|  | { | ||||||
|  |   autoView( buf_v , buf, CpuRead); | ||||||
|  |   return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -361,6 +361,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   // But easily avoided by using double precision fields |   // But easily avoided by using double precision fields | ||||||
|   /////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////// | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_object::scalar_type scalar_type; | ||||||
|   GridBase  *grid = Data.Grid(); |   GridBase  *grid = Data.Grid(); | ||||||
|   assert(grid!=NULL); |   assert(grid!=NULL); | ||||||
|  |  | ||||||
| @@ -419,20 +420,19 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   // sum over nodes. |   // sum over nodes. | ||||||
|   sobj gsum; |  | ||||||
|   for(int t=0;t<fd;t++){ |   for(int t=0;t<fd;t++){ | ||||||
|     int pt = t/ld; // processor plane |     int pt = t/ld; // processor plane | ||||||
|     int lt = t%ld; |     int lt = t%ld; | ||||||
|     if ( pt == grid->_processor_coor[orthogdim] ) { |     if ( pt == grid->_processor_coor[orthogdim] ) { | ||||||
|       gsum=lsSum[lt]; |       result[t]=lsSum[lt]; | ||||||
|     } else { |     } else { | ||||||
|       gsum=Zero(); |       result[t]=Zero(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     grid->GlobalSum(gsum); |  | ||||||
|  |  | ||||||
|     result[t]=gsum; |  | ||||||
|   } |   } | ||||||
|  |   scalar_type * ptr = (scalar_type *) &result[0]; | ||||||
|  |   int words = fd*sizeof(sobj)/sizeof(scalar_type); | ||||||
|  |   grid->GlobalSumVector(ptr, words); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
|   | |||||||
| @@ -32,8 +32,9 @@ | |||||||
| #include <random> | #include <random> | ||||||
|  |  | ||||||
| #ifdef RNG_SITMO | #ifdef RNG_SITMO | ||||||
| #include <Grid/sitmo_rng/sitmo_prng_engine.hpp> | #include <Grid/random/sitmo_prng_engine.hpp> | ||||||
| #endif  | #endif  | ||||||
|  | #include <Grid/random/gaussian.h> | ||||||
|  |  | ||||||
| #if defined(RNG_SITMO) | #if defined(RNG_SITMO) | ||||||
| #define RNG_FAST_DISCARD | #define RNG_FAST_DISCARD | ||||||
| @@ -142,7 +143,7 @@ public: | |||||||
|  |  | ||||||
|   std::vector<RngEngine>                             _generators; |   std::vector<RngEngine>                             _generators; | ||||||
|   std::vector<std::uniform_real_distribution<RealD> > _uniform; |   std::vector<std::uniform_real_distribution<RealD> > _uniform; | ||||||
|   std::vector<std::normal_distribution<RealD> >       _gaussian; |   std::vector<Grid::gaussian_distribution<RealD> >    _gaussian; | ||||||
|   std::vector<std::discrete_distribution<int32_t> >   _bernoulli; |   std::vector<std::discrete_distribution<int32_t> >   _bernoulli; | ||||||
|   std::vector<std::uniform_int_distribution<uint32_t> > _uid; |   std::vector<std::uniform_int_distribution<uint32_t> > _uid; | ||||||
|  |  | ||||||
| @@ -243,7 +244,7 @@ public: | |||||||
|   GridSerialRNG() : GridRNGbase() { |   GridSerialRNG() : GridRNGbase() { | ||||||
|     _generators.resize(1); |     _generators.resize(1); | ||||||
|     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); |     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|     _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) ); |     _gaussian.resize(1,gaussian_distribution<RealD>(0.0,1.0) ); | ||||||
|     _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
| @@ -357,7 +358,7 @@ public: | |||||||
|  |  | ||||||
|     _generators.resize(_vol); |     _generators.resize(_vol); | ||||||
|     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); |     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|     _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) ); |     _gaussian.resize(_vol,gaussian_distribution<RealD>(0.0,1.0) ); | ||||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -364,15 +364,21 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|   autoView( coarseData_ , coarseData, AcceleratorWrite); |   autoView( coarseData_ , coarseData, AcceleratorWrite); | ||||||
|   autoView( fineData_   , fineData, AcceleratorRead); |   autoView( fineData_   , fineData, AcceleratorRead); | ||||||
|  |  | ||||||
|  |   auto coarseData_p = &coarseData_[0]; | ||||||
|  |   auto fineData_p = &fineData_[0]; | ||||||
|  |    | ||||||
|   Coordinate fine_rdimensions = fine->_rdimensions; |   Coordinate fine_rdimensions = fine->_rdimensions; | ||||||
|   Coordinate coarse_rdimensions = coarse->_rdimensions; |   Coordinate coarse_rdimensions = coarse->_rdimensions; | ||||||
|  |  | ||||||
|  |   vobj zz = Zero(); | ||||||
|  |    | ||||||
|   accelerator_for(sc,coarse->oSites(),1,{ |   accelerator_for(sc,coarse->oSites(),1,{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|       coarseData_[sc]=Zero(); |  | ||||||
|  |       vobj cd = zz; | ||||||
|        |        | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
| @@ -383,9 +389,11 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||||
|  |  | ||||||
| 	coarseData_[sc]=coarseData_[sc]+fineData_[sf]; | 	cd=cd+fineData_p[sf]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |       coarseData_p[sc] = cd; | ||||||
|  |  | ||||||
|     }); |     }); | ||||||
|   return; |   return; | ||||||
| } | } | ||||||
| @@ -777,7 +785,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -1002,54 +1010,96 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | |||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
| //Convert a Lattice from one precision to another | //The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls | ||||||
| template<class VobjOut, class VobjIn> | class precisionChangeWorkspace{ | ||||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in) |   std::pair<Integer,Integer>* fmap_device; //device pointer | ||||||
| { | public: | ||||||
|   assert(out.Grid()->Nd() == in.Grid()->Nd()); |   precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid){ | ||||||
|   for(int d=0;d<out.Grid()->Nd();d++){ |     //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device | ||||||
|     assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]); |     assert(out_grid->Nd() == in_grid->Nd()); | ||||||
|  |     for(int d=0;d<out_grid->Nd();d++){ | ||||||
|  |       assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]); | ||||||
|     } |     } | ||||||
|   out.Checkerboard() = in.Checkerboard(); |     int Nsimd_out = out_grid->Nsimd(); | ||||||
|   GridBase *in_grid=in.Grid(); |  | ||||||
|   GridBase *out_grid = out.Grid(); |  | ||||||
|  |  | ||||||
|   typedef typename VobjOut::scalar_object SobjOut; |     std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these | ||||||
|   typedef typename VobjIn::scalar_object SobjIn; |     for(int lane=0; lane < out_grid->Nsimd(); lane++) | ||||||
|  |       out_grid->iCoorFromIindex(out_icorrs[lane], lane); | ||||||
|    |    | ||||||
|   int ndim = out.Grid()->Nd(); |     std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd | ||||||
|   int out_nsimd = out_grid->Nsimd(); |  | ||||||
|      |  | ||||||
|   std::vector<Coordinate > out_icoor(out_nsimd); |  | ||||||
|        |  | ||||||
|   for(int lane=0; lane < out_nsimd; lane++){ |  | ||||||
|     out_icoor[lane].resize(ndim); |  | ||||||
|     out_grid->iCoorFromIindex(out_icoor[lane], lane); |  | ||||||
|   } |  | ||||||
|          |  | ||||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); |  | ||||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); |  | ||||||
|      |  | ||||||
|   autoView( out_v , out, CpuWrite); |  | ||||||
|     thread_for(out_oidx,out_grid->oSites(),{ |     thread_for(out_oidx,out_grid->oSites(),{ | ||||||
|     Coordinate out_ocoor(ndim); | 	Coordinate out_ocorr;  | ||||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | 	out_grid->oCoorFromOindex(out_ocorr, out_oidx); | ||||||
|        |        | ||||||
|     ExtractPointerArray<SobjOut> ptrs(out_nsimd);       | 	Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate) | ||||||
|  | 	for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ | ||||||
|  | 	  out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr); | ||||||
| 	 | 	 | ||||||
|     Coordinate lcoor(out_grid->Nd()); | 	  //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr); | ||||||
|        | 	  //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice | ||||||
|     for(int lane=0; lane < out_nsimd; lane++){ | 	  //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity | ||||||
|       for(int mu=0;mu<ndim;mu++) | 	  int in_oidx = 0, in_lane = 0; | ||||||
| 	lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu]; | 	  for(int d=0;d<in_grid->_ndimension;d++){ | ||||||
| 	 | 	    in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] ); | ||||||
|       int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); | 	    in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] ); | ||||||
|       ptrs[lane] = &in_slex_conv[llex]; | 	  } | ||||||
|  | 	  fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane ); | ||||||
|  | 	} | ||||||
|  |       }); | ||||||
|  |  | ||||||
|  |     //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines) | ||||||
|  |     size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>); | ||||||
|  |     fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes); | ||||||
|  |     acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes);  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   //Prevent moving or copying | ||||||
|  |   precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete; | ||||||
|  |   precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete; | ||||||
|  |   precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete; | ||||||
|  |   precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete; | ||||||
|  |    | ||||||
|  |   std::pair<Integer,Integer> const* getMap() const{ return fmap_device; } | ||||||
|  |  | ||||||
|  |   ~precisionChangeWorkspace(){ | ||||||
|  |     acceleratorFreeDevice(fmap_device); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | //Convert a lattice of one precision to another. The input workspace contains the mapping data. | ||||||
|  | template<class VobjOut, class VobjIn> | ||||||
|  | void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){ | ||||||
|  |   static_assert( std::is_same<typename VobjOut::DoublePrecision, typename VobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same | ||||||
|  |  | ||||||
|  |   out.Checkerboard() = in.Checkerboard(); | ||||||
|  |   constexpr int Nsimd_out = VobjOut::Nsimd(); | ||||||
|  |  | ||||||
|  |   std::pair<Integer,Integer> const* fmap_device = workspace.getMap(); | ||||||
|  |  | ||||||
|  |   //Do the copy/precision change | ||||||
|  |   autoView( out_v , out, AcceleratorWrite); | ||||||
|  |   autoView( in_v , in, AcceleratorRead); | ||||||
|  |  | ||||||
|  |   accelerator_for(out_oidx, out.Grid()->oSites(), 1,{ | ||||||
|  |       std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out; | ||||||
|  |       for(int out_lane=0; out_lane < Nsimd_out; out_lane++){       | ||||||
|  | 	int in_oidx = fmap_osite[out_lane].first; | ||||||
|  | 	int in_lane = fmap_osite[out_lane].second; | ||||||
|  | 	copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane); | ||||||
|       } |       } | ||||||
|     merge(out_v[out_oidx], ptrs, 0); |  | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | //Convert a Lattice from one precision to another | ||||||
|  | //Generate the workspace in place; if multiple calls with the same mapping are performed, consider pregenerating the workspace and reusing | ||||||
|  | template<class VobjOut, class VobjIn> | ||||||
|  | void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||||
|  |   precisionChangeWorkspace workspace(out.Grid(), in.Grid()); | ||||||
|  |   precisionChange(out, in, workspace); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| // Communicate between grids | // Communicate between grids | ||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -69,6 +69,7 @@ GridLogger GridLogDebug  (1, "Debug", GridLogColours, "PURPLE"); | |||||||
| GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | ||||||
| GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | ||||||
| GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | ||||||
|  | GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); | ||||||
|  |  | ||||||
| void GridLogConfigure(std::vector<std::string> &logstreams) { | void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||||
|   GridLogError.Active(0); |   GridLogError.Active(0); | ||||||
| @@ -79,6 +80,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
|   GridLogPerformance.Active(0); |   GridLogPerformance.Active(0); | ||||||
|   GridLogIntegrator.Active(1); |   GridLogIntegrator.Active(1); | ||||||
|   GridLogColours.Active(0); |   GridLogColours.Active(0); | ||||||
|  |   GridLogHMC.Active(1); | ||||||
|  |  | ||||||
|   for (int i = 0; i < logstreams.size(); i++) { |   for (int i = 0; i < logstreams.size(); i++) { | ||||||
|     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1); |     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1); | ||||||
| @@ -87,7 +89,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
|     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1); |     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1); | ||||||
|     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1); |     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1); | ||||||
|     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); |     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); | ||||||
|     if (logstreams[i] == std::string("Integrator"))  GridLogIntegrator.Active(1); |     if (logstreams[i] == std::string("NoIntegrator"))  GridLogIntegrator.Active(0); | ||||||
|  |     if (logstreams[i] == std::string("NoHMC"))         GridLogHMC.Active(0); | ||||||
|     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1); |     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -182,6 +182,7 @@ extern GridLogger GridLogDebug  ; | |||||||
| extern GridLogger GridLogPerformance; | extern GridLogger GridLogPerformance; | ||||||
| extern GridLogger GridLogIterative  ; | extern GridLogger GridLogIterative  ; | ||||||
| extern GridLogger GridLogIntegrator  ; | extern GridLogger GridLogIntegrator  ; | ||||||
|  | extern GridLogger GridLogHMC; | ||||||
| extern Colours    GridLogColours; | extern Colours    GridLogColours; | ||||||
|  |  | ||||||
| std::string demangle(const char* name) ; | std::string demangle(const char* name) ; | ||||||
|   | |||||||
| @@ -39,9 +39,11 @@ using namespace Grid; | |||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| class NerscIO : public BinaryIO {  | class NerscIO : public BinaryIO {  | ||||||
| public: | public: | ||||||
|  |  | ||||||
|   typedef Lattice<vLorentzColourMatrixD> GaugeField; |   typedef Lattice<vLorentzColourMatrixD> GaugeField; | ||||||
|  |  | ||||||
|  |   // Enable/disable exiting if the plaquette in the header does not match the value computed (default true) | ||||||
|  |   static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; } | ||||||
|  |  | ||||||
|   static inline void truncate(std::string file){ |   static inline void truncate(std::string file){ | ||||||
|     std::ofstream fout(file,std::ios::out); |     std::ofstream fout(file,std::ios::out); | ||||||
|   } |   } | ||||||
| @@ -198,7 +200,7 @@ public: | |||||||
|       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; |       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||||
|       exit(0); |       exit(0); | ||||||
|     } |     } | ||||||
|     assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); |     if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||||
|     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); |     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||||
|     assert(nersc_csum == header.checksum ); |     assert(nersc_csum == header.checksum ); | ||||||
|        |        | ||||||
|   | |||||||
| @@ -63,6 +63,7 @@ static constexpr int Ngp=2; // gparity index range | |||||||
| #define ColourIndex  (2) | #define ColourIndex  (2) | ||||||
| #define SpinIndex    (1) | #define SpinIndex    (1) | ||||||
| #define LorentzIndex (0) | #define LorentzIndex (0) | ||||||
|  | #define GparityFlavourIndex (0) | ||||||
|  |  | ||||||
| // Also should make these a named enum type | // Also should make these a named enum type | ||||||
| static constexpr int DaggerNo=0; | static constexpr int DaggerNo=0; | ||||||
| @@ -87,6 +88,8 @@ template<typename T> struct isCoarsened { | |||||||
| template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | ||||||
| template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | ||||||
|  |  | ||||||
|  | const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! | ||||||
|  |  | ||||||
| // ChrisK very keen to add extra space for Gparity doubling. | // ChrisK very keen to add extra space for Gparity doubling. | ||||||
| // | // | ||||||
| // Also add domain wall index, in a way where Wilson operator  | // Also add domain wall index, in a way where Wilson operator  | ||||||
| @@ -101,6 +104,7 @@ template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iSca | |||||||
| template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | ||||||
| template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | ||||||
| template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | ||||||
|  | template<typename vtype> using iLorentzVector             = iVector<iScalar<iScalar<vtype> >, Nd > ; | ||||||
| template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | ||||||
| template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | ||||||
| template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | ||||||
| @@ -110,8 +114,10 @@ template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVec | |||||||
|     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >; |     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<typename vtype> using iGparityFlavourVector                = iVector<iScalar<iScalar<vtype> >, Ngp>; | ||||||
| template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >; | template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >; | ||||||
| template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >; | template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >; | ||||||
|  | template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>; | ||||||
|  |  | ||||||
| // Spin matrix | // Spin matrix | ||||||
| typedef iSpinMatrix<Complex  >          SpinMatrix; | typedef iSpinMatrix<Complex  >          SpinMatrix; | ||||||
| @@ -158,7 +164,16 @@ typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix; | |||||||
| typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF; | typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF; | ||||||
| typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD; | typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD; | ||||||
|  |  | ||||||
| // LorentzColour | // LorentzVector | ||||||
|  | typedef iLorentzVector<Complex  > LorentzVector; | ||||||
|  | typedef iLorentzVector<ComplexF > LorentzVectorF; | ||||||
|  | typedef iLorentzVector<ComplexD > LorentzVectorD; | ||||||
|  |  | ||||||
|  | typedef iLorentzVector<vComplex > vLorentzVector; | ||||||
|  | typedef iLorentzVector<vComplexF> vLorentzVectorF; | ||||||
|  | typedef iLorentzVector<vComplexD> vLorentzVectorD; | ||||||
|  |  | ||||||
|  | // LorentzColourMatrix | ||||||
| typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix; | typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix; | ||||||
| typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; | typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; | ||||||
| typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; | typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; | ||||||
| @@ -176,6 +191,16 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix; | |||||||
| typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF; | typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF; | ||||||
| typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD; | typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD; | ||||||
|  |  | ||||||
|  | //G-parity flavour matrix | ||||||
|  | typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix; | ||||||
|  | typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF; | ||||||
|  | typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD; | ||||||
|  |  | ||||||
|  | typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix; | ||||||
|  | typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF; | ||||||
|  | typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD; | ||||||
|  |  | ||||||
|  |  | ||||||
| // Spin vector | // Spin vector | ||||||
| typedef iSpinVector<Complex >           SpinVector; | typedef iSpinVector<Complex >           SpinVector; | ||||||
| typedef iSpinVector<ComplexF>           SpinVectorF; | typedef iSpinVector<ComplexF>           SpinVectorF; | ||||||
| @@ -221,6 +246,16 @@ typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector; | |||||||
| typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF; | typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF; | ||||||
| typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD; | typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD; | ||||||
|  |  | ||||||
|  | //G-parity flavour vector | ||||||
|  | typedef iGparityFlavourVector<Complex >         GparityFlavourVector; | ||||||
|  | typedef iGparityFlavourVector<ComplexF>         GparityFlavourVectorF; | ||||||
|  | typedef iGparityFlavourVector<ComplexD>         GparityFlavourVectorD; | ||||||
|  |  | ||||||
|  | typedef iGparityFlavourVector<vComplex >         vGparityFlavourVector; | ||||||
|  | typedef iGparityFlavourVector<vComplexF>         vGparityFlavourVectorF; | ||||||
|  | typedef iGparityFlavourVector<vComplexD>         vGparityFlavourVectorD; | ||||||
|  |  | ||||||
|  |      | ||||||
| // singlets | // singlets | ||||||
| typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type. | typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type. | ||||||
| typedef iSinglet<ComplexF>         TComplexF;    // FIXME This is painful. Tensor singlet complex type. | typedef iSinglet<ComplexF>         TComplexF;    // FIXME This is painful. Tensor singlet complex type. | ||||||
| @@ -263,6 +298,10 @@ typedef Lattice<vLorentzColourMatrix>  LatticeLorentzColourMatrix; | |||||||
| typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF; | typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF; | ||||||
| typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD; | typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD; | ||||||
|  |  | ||||||
|  | typedef Lattice<vLorentzVector>  LatticeLorentzVector; | ||||||
|  | typedef Lattice<vLorentzVectorF> LatticeLorentzVectorF; | ||||||
|  | typedef Lattice<vLorentzVectorD> LatticeLorentzVectorD; | ||||||
|  |  | ||||||
| // DoubleStored gauge field | // DoubleStored gauge field | ||||||
| typedef Lattice<vDoubleStoredColourMatrix>  LatticeDoubleStoredColourMatrix; | typedef Lattice<vDoubleStoredColourMatrix>  LatticeDoubleStoredColourMatrix; | ||||||
| typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF; | typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF; | ||||||
|   | |||||||
| @@ -30,8 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #ifndef GRID_QCD_ACTION_H | #pragma once | ||||||
| #define GRID_QCD_ACTION_H |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Abstract base interface | // Abstract base interface | ||||||
| @@ -51,4 +50,4 @@ NAMESPACE_CHECK(Fermion); | |||||||
| #include <Grid/qcd/action/pseudofermion/PseudoFermion.h> | #include <Grid/qcd/action/pseudofermion/PseudoFermion.h> | ||||||
| NAMESPACE_CHECK(PseudoFermion); | NAMESPACE_CHECK(PseudoFermion); | ||||||
|  |  | ||||||
| #endif |  | ||||||
|   | |||||||
| @@ -40,6 +40,29 @@ class Action | |||||||
|  |  | ||||||
| public: | public: | ||||||
|   bool is_smeared = false; |   bool is_smeared = false; | ||||||
|  |   RealD deriv_norm_sum; | ||||||
|  |   RealD deriv_max_sum; | ||||||
|  |   int   deriv_num; | ||||||
|  |   RealD deriv_us; | ||||||
|  |   RealD S_us; | ||||||
|  |   RealD refresh_us; | ||||||
|  |   void  reset_timer(void)        { | ||||||
|  |     deriv_us = S_us = refresh_us = 0.0; | ||||||
|  |     deriv_num=0; | ||||||
|  |     deriv_norm_sum = deriv_max_sum=0.0; | ||||||
|  |   } | ||||||
|  |   void  deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;} | ||||||
|  |   RealD deriv_max_average(void)         { return deriv_max_sum/deriv_num; }; | ||||||
|  |   RealD deriv_norm_average(void)        { return deriv_norm_sum/deriv_num; }; | ||||||
|  |   RealD deriv_timer(void)        { return deriv_us; }; | ||||||
|  |   RealD S_timer(void)            { return deriv_us; }; | ||||||
|  |   RealD refresh_timer(void)      { return deriv_us; }; | ||||||
|  |   void deriv_timer_start(void)   { deriv_us-=usecond(); } | ||||||
|  |   void deriv_timer_stop(void)    { deriv_us+=usecond(); } | ||||||
|  |   void refresh_timer_start(void) { refresh_us-=usecond(); } | ||||||
|  |   void refresh_timer_stop(void)  { refresh_us+=usecond(); } | ||||||
|  |   void S_timer_start(void)       { S_us-=usecond(); } | ||||||
|  |   void S_timer_stop(void)        { S_us+=usecond(); } | ||||||
|   // Heatbath? |   // Heatbath? | ||||||
|   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions |   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions | ||||||
|   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action |   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action | ||||||
|   | |||||||
| @@ -58,6 +58,8 @@ NAMESPACE_CHECK(Scalar); | |||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Utility functions | // Utility functions | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
|  | #include <Grid/qcd/action/domains/Domains.h> | ||||||
|  |  | ||||||
| #include <Grid/qcd/utils/Metric.h> | #include <Grid/qcd/utils/Metric.h> | ||||||
| NAMESPACE_CHECK(Metric); | NAMESPACE_CHECK(Metric); | ||||||
| #include <Grid/qcd/utils/CovariantLaplacian.h> | #include <Grid/qcd/utils/CovariantLaplacian.h> | ||||||
|   | |||||||
| @@ -36,28 +36,34 @@ NAMESPACE_BEGIN(Grid); | |||||||
|  |  | ||||||
| // These can move into a params header and be given MacroMagic serialisation | // These can move into a params header and be given MacroMagic serialisation | ||||||
| struct GparityWilsonImplParams { | struct GparityWilsonImplParams { | ||||||
|   Coordinate twists; |   Coordinate twists; //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||||
|   GparityWilsonImplParams() : twists(Nd, 0) {}; |                      //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs | ||||||
|  |   bool locally_periodic; | ||||||
|  |   GparityWilsonImplParams() : twists(Nd, 0), locally_periodic(false) {}; | ||||||
| }; | }; | ||||||
|    |    | ||||||
| struct WilsonImplParams { | struct WilsonImplParams { | ||||||
|   bool overlapCommsCompute; |   bool overlapCommsCompute; | ||||||
|  |   bool locally_periodic; | ||||||
|   AcceleratorVector<Real,Nd> twist_n_2pi_L; |   AcceleratorVector<Real,Nd> twist_n_2pi_L; | ||||||
|   AcceleratorVector<Complex,Nd> boundary_phases; |   AcceleratorVector<Complex,Nd> boundary_phases; | ||||||
|   WilsonImplParams()  { |   WilsonImplParams()  { | ||||||
|     boundary_phases.resize(Nd, 1.0); |     boundary_phases.resize(Nd, 1.0); | ||||||
|       twist_n_2pi_L.resize(Nd, 0.0); |       twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|  |       locally_periodic = false; | ||||||
|   }; |   }; | ||||||
|   WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { |   WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { | ||||||
|     twist_n_2pi_L.resize(Nd, 0.0); |     twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|  |     locally_periodic = false; | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| struct StaggeredImplParams { | struct StaggeredImplParams { | ||||||
|   StaggeredImplParams()  {}; |   bool locally_periodic; | ||||||
|  |   StaggeredImplParams() : locally_periodic(false) {}; | ||||||
| }; | }; | ||||||
|    |    | ||||||
|   struct OneFlavourRationalParams : Serializable { | struct OneFlavourRationalParams : Serializable { | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  |     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  | ||||||
| 				    RealD, lo,  | 				    RealD, lo,  | ||||||
| 				    RealD, hi,  | 				    RealD, hi,  | ||||||
| @@ -86,6 +92,50 @@ struct StaggeredImplParams { | |||||||
|         BoundsCheckFreq(_BoundsCheckFreq){}; |         BoundsCheckFreq(_BoundsCheckFreq){}; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   /*Action parameters for the generalized rational action | ||||||
|  |     The approximation is for (M^dag M)^{1/inv_pow} | ||||||
|  |     where inv_pow is the denominator of the fractional power. | ||||||
|  |     Default inv_pow=2 for square root, making this equivalent to  | ||||||
|  |     the OneFlavourRational action | ||||||
|  |   */ | ||||||
|  |     struct RationalActionParams : Serializable { | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,  | ||||||
|  | 				    int, inv_pow,  | ||||||
|  | 				    RealD, lo, //low eigenvalue bound of rational approx | ||||||
|  | 				    RealD, hi, //high eigenvalue bound of rational approx | ||||||
|  | 				    int,   MaxIter,  //maximum iterations in msCG | ||||||
|  | 				    RealD, action_tolerance,  //msCG tolerance in action evaluation | ||||||
|  | 				    int,   action_degree, //rational approx tolerance in action evaluation | ||||||
|  | 				    RealD, md_tolerance,  //msCG tolerance in MD integration | ||||||
|  | 				    int,   md_degree, //rational approx tolerance in MD integration | ||||||
|  | 				    int,   precision, //precision of floating point arithmetic | ||||||
|  | 				    int,   BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check | ||||||
|  |   // constructor  | ||||||
|  |   RationalActionParams(int _inv_pow = 2, | ||||||
|  | 		       RealD _lo      = 0.0,  | ||||||
|  | 		       RealD _hi      = 1.0,  | ||||||
|  | 		       int _maxit     = 1000, | ||||||
|  | 		       RealD _action_tolerance      = 1.0e-8,  | ||||||
|  | 		       int _action_degree    = 10, | ||||||
|  | 		       RealD _md_tolerance      = 1.0e-8,  | ||||||
|  | 		       int _md_degree    = 10, | ||||||
|  | 		       int _precision = 64, | ||||||
|  | 		       int _BoundsCheckFreq=20) | ||||||
|  |     : inv_pow(_inv_pow),  | ||||||
|  |       lo(_lo), | ||||||
|  |       hi(_hi), | ||||||
|  |       MaxIter(_maxit), | ||||||
|  |       action_tolerance(_action_tolerance), | ||||||
|  |       action_degree(_action_degree), | ||||||
|  |       md_tolerance(_md_tolerance), | ||||||
|  |       md_degree(_md_degree), | ||||||
|  |       precision(_precision), | ||||||
|  |       BoundsCheckFreq(_BoundsCheckFreq){}; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |    | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -2,14 +2,12 @@ | |||||||
| 
 | 
 | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
| 
 | 
 | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | Source file: ./lib/qcd/hmc/DDHMC.h | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2015, 2020 | Copyright (C) 2021 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | Author: Christopher Kelly | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
| 
 | 
 | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -28,24 +26,27 @@ with this program; if not, write to the Free Software Foundation, Inc., | |||||||
| See the full license in the file "LICENSE" in the top level distribution | See the full license in the file "LICENSE" in the top level distribution | ||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | 			   /*  END LEGAL */ | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
| 
 |  | ||||||
| #ifndef AVX512 |  | ||||||
| #ifndef QPX |  | ||||||
| #ifndef A64FX |  | ||||||
| #ifndef A64FXFIXEDSIZE |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  | ////////////////////////////////////////////////////
 | ||||||
|  | // DDHMC filter with sub-block size B[mu]
 | ||||||
|  | ////////////////////////////////////////////////////
 | ||||||
| 
 | 
 | ||||||
| #include "impl.h" | template<typename MomentaField> | ||||||
| template class WilsonKernels<IMPLEMENTATION>; | struct DDHMCFilter: public MomentumFilterBase<MomentaField> | ||||||
|  | { | ||||||
|  |   Coordinate Block; | ||||||
|  |   int Width; | ||||||
|  |    | ||||||
|  |   DDHMCFilter(const Coordinate &_Block): Block(_Block) {} | ||||||
|  | 
 | ||||||
|  |   void applyFilter(MomentaField &P) const override | ||||||
|  |   { | ||||||
|  |     DomainDecomposition Domains(Block); | ||||||
|  |     Domains.ProjectDDHMC(P); | ||||||
|  |   } | ||||||
|  | }; | ||||||
| 
 | 
 | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  | 
 | ||||||
							
								
								
									
										98
									
								
								Grid/qcd/action/domains/DirichletFilter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								Grid/qcd/action/domains/DirichletFilter.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/qcd/action/momentum/DirichletFilter.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | 			   /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | // Dirichlet filter with sub-block size B[mu] | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | #pragma once  | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/action/domains/DomainDecomposition.h> | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<typename MomentaField> | ||||||
|  | struct DirichletFilter: public MomentumFilterBase<MomentaField> | ||||||
|  | { | ||||||
|  |   Coordinate Block; | ||||||
|  |    | ||||||
|  |   DirichletFilter(const Coordinate &_Block): Block(_Block) {} | ||||||
|  |  | ||||||
|  |   // Edge detect using domain projectors | ||||||
|  |   void applyFilter (MomentaField &U) const override | ||||||
|  |   { | ||||||
|  |     DomainDecomposition Domains(Block); | ||||||
|  |     GridBase *grid = U.Grid(); | ||||||
|  |     LatticeInteger  coor(grid); | ||||||
|  |     LatticeInteger  face(grid); | ||||||
|  |     LatticeInteger  one(grid);   one = 1; | ||||||
|  |     LatticeInteger  zero(grid); zero = 0; | ||||||
|  |     LatticeInteger  omega(grid); | ||||||
|  |     LatticeInteger  omegabar(grid); | ||||||
|  |     LatticeInteger  tmp(grid); | ||||||
|  |  | ||||||
|  |     omega=one;    Domains.ProjectDomain(omega,0); | ||||||
|  |     omegabar=one; Domains.ProjectDomain(omegabar,1); | ||||||
|  |      | ||||||
|  |     LatticeInteger nface(grid); nface=Zero(); | ||||||
|  |      | ||||||
|  |     MomentaField projected(grid); projected=Zero(); | ||||||
|  |     typedef decltype(PeekIndex<LorentzIndex>(U,0)) MomentaLinkField; | ||||||
|  |     MomentaLinkField  Umu(grid); | ||||||
|  |     MomentaLinkField   zz(grid); zz=Zero(); | ||||||
|  |  | ||||||
|  |     int dims = grid->Nd(); | ||||||
|  |     Coordinate Global=grid->GlobalDimensions(); | ||||||
|  |     assert(dims==Nd); | ||||||
|  |  | ||||||
|  |     for(int mu=0;mu<Nd;mu++){ | ||||||
|  |  | ||||||
|  |       if ( Block[mu]!=0 ) { | ||||||
|  |  | ||||||
|  | 	Umu = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  |  | ||||||
|  | 	// Upper face  | ||||||
|  |  	tmp = Cshift(omegabar,mu,1); | ||||||
|  | 	tmp = tmp + omega; | ||||||
|  | 	face = where(tmp == Integer(2),one,zero ); | ||||||
|  |  | ||||||
|  |  	tmp = Cshift(omega,mu,1); | ||||||
|  | 	tmp = tmp + omegabar; | ||||||
|  | 	face = where(tmp == Integer(2),one,face ); | ||||||
|  |  | ||||||
|  | 	Umu = where(face,zz,Umu); | ||||||
|  |  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, Umu, mu); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |    | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
							
								
								
									
										187
									
								
								Grid/qcd/action/domains/DomainDecomposition.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										187
									
								
								Grid/qcd/action/domains/DomainDecomposition.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,187 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/qcd/action/domains/DomainDecomposition.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | 			   /*  END LEGAL */ | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | // Dirichlet filter with sub-block size B[mu] | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | #pragma once  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | struct DomainDecomposition | ||||||
|  | { | ||||||
|  |   Coordinate Block; | ||||||
|  |   static constexpr RealD factor = 0.6; | ||||||
|  |  | ||||||
|  |   DomainDecomposition(const Coordinate &_Block): Block(_Block){ assert(Block.size()==Nd);}; | ||||||
|  |    | ||||||
|  |   template<class Field> | ||||||
|  |   void ProjectDomain(Field &f,Integer domain) | ||||||
|  |   { | ||||||
|  |     GridBase *grid = f.Grid(); | ||||||
|  |     int dims = grid->Nd(); | ||||||
|  |     int isDWF= (dims==Nd+1); | ||||||
|  |     assert((dims==Nd)||(dims==Nd+1)); | ||||||
|  |  | ||||||
|  |     Field   zz(grid);  zz = Zero(); | ||||||
|  |     LatticeInteger coor(grid); | ||||||
|  |     LatticeInteger domaincoor(grid); | ||||||
|  |     LatticeInteger mask(grid); mask = Integer(1); | ||||||
|  |     LatticeInteger zi(grid);     zi = Integer(0); | ||||||
|  |     for(int d=0;d<Nd;d++){ | ||||||
|  |       Integer B= Block[d]; | ||||||
|  |       if ( B ) { | ||||||
|  | 	LatticeCoordinate(coor,d+isDWF); | ||||||
|  | 	domaincoor = mod(coor,B); | ||||||
|  | 	mask = where(domaincoor==Integer(0),zi,mask); | ||||||
|  | 	mask = where(domaincoor==Integer(B-1),zi,mask); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     if ( !domain ) | ||||||
|  |       f = where(mask==Integer(1),f,zz); | ||||||
|  |     else  | ||||||
|  |       f = where(mask==Integer(0),f,zz); | ||||||
|  |   }; | ||||||
|  |   template<class GaugeField> | ||||||
|  |   void ProjectDDHMC(GaugeField &U) | ||||||
|  |   { | ||||||
|  |     GridBase *grid = U.Grid(); | ||||||
|  |     Coordinate Global=grid->GlobalDimensions(); | ||||||
|  |     GaugeField zzz(grid); zzz = Zero(); | ||||||
|  |     LatticeInteger coor(grid);  | ||||||
|  |  | ||||||
|  |     GaugeField Uorg(grid); Uorg = U; | ||||||
|  |      | ||||||
|  |     auto zzz_mu = PeekIndex<LorentzIndex>(zzz,0); | ||||||
|  |     //////////////////////////////////////////////////// | ||||||
|  |     // Zero BDY layers | ||||||
|  |     //////////////////////////////////////////////////// | ||||||
|  |     for(int mu=0;mu<Nd;mu++) { | ||||||
|  |       Integer B1 = Block[mu]; | ||||||
|  |       if ( B1 && (B1 <= Global[mu]) ) { | ||||||
|  | 	LatticeCoordinate(coor,mu); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	//////////////////////////////// | ||||||
|  | 	// OmegaBar - zero all links contained in slice B-1,0 and | ||||||
|  | 	// mu links connecting to Omega | ||||||
|  | 	//////////////////////////////// | ||||||
|  |  | ||||||
|  | 	U    = where(mod(coor,B1)==Integer(B1-1),zzz,U); | ||||||
|  | 	U    = where(mod(coor,B1)==Integer(0)   ,zzz,U);  | ||||||
|  |  | ||||||
|  | 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu);  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     | ||||||
|  |     //////////////////////////////////////////// | ||||||
|  |     // Omega interior slow the evolution | ||||||
|  |     // Tricky as we need to take the smallest of values imposed by each cut | ||||||
|  |     // Do them in order or largest to smallest and smallest writes last | ||||||
|  |     //////////////////////////////////////////// | ||||||
|  |     RealD f= factor; | ||||||
|  | #if 0     | ||||||
|  |     for(int mu=0;mu<Nd;mu++) { | ||||||
|  |       Integer B1 = Block[mu]; | ||||||
|  |       if ( B1 && (B1 <= Global[mu]) ) { | ||||||
|  |  | ||||||
|  | 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  | 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||||
|  | 	// In the plane | ||||||
|  | 	U = where(mod(coor,B1)==Integer(B1-5),Uorg*f,U);  | ||||||
|  | 	U = where(mod(coor,B1)==Integer(4)   ,Uorg*f,U);  | ||||||
|  |  | ||||||
|  | 	// Perp links | ||||||
|  |        	U_mu = where(mod(coor,B1)==Integer(B1-6),Uorg_mu*f,U_mu); | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(4)   ,Uorg_mu*f,U_mu); | ||||||
|  |  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |     for(int mu=0;mu<Nd;mu++) { | ||||||
|  |       Integer B1 = Block[mu]; | ||||||
|  |       if ( B1 && (B1 <= Global[mu]) ) { | ||||||
|  |  | ||||||
|  | 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  | 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||||
|  | 	// In the plane | ||||||
|  | 	U = where(mod(coor,B1)==Integer(B1-4),Uorg*f*f,U);  | ||||||
|  | 	U = where(mod(coor,B1)==Integer(3)   ,Uorg*f*f,U);  | ||||||
|  |  | ||||||
|  | 	// Perp links | ||||||
|  |        	U_mu = where(mod(coor,B1)==Integer(B1-5),Uorg_mu*f*f,U_mu); | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(3)   ,Uorg_mu*f*f,U_mu); | ||||||
|  |  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     for(int mu=0;mu<Nd;mu++) { | ||||||
|  |       Integer B1 = Block[mu]; | ||||||
|  |       if ( B1 && (B1 <= Global[mu]) ) { | ||||||
|  |  | ||||||
|  | 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  | 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||||
|  | 	// In the plane | ||||||
|  | 	U = where(mod(coor,B1)==Integer(B1-3),Uorg*f*f*f,U);  | ||||||
|  | 	U = where(mod(coor,B1)==Integer(2)   ,Uorg*f*f*f,U);  | ||||||
|  |  | ||||||
|  | 	// Perp links | ||||||
|  |        	U_mu = where(mod(coor,B1)==Integer(B1-4),Uorg_mu*f*f*f,U_mu); | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(2)   ,Uorg_mu*f*f*f,U_mu); | ||||||
|  |  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     for(int mu=0;mu<Nd;mu++) { | ||||||
|  |       Integer B1 = Block[mu]; | ||||||
|  |       if ( B1 && (B1 <= Global[mu]) ) { | ||||||
|  |  | ||||||
|  | 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||||
|  | 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||||
|  | 	// In the plane | ||||||
|  | 	U = where(mod(coor,B1)==Integer(B1-2),zzz,U);  | ||||||
|  | 	U = where(mod(coor,B1)==Integer(1)   ,zzz,U);  | ||||||
|  |  | ||||||
|  | 	// Perp links | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(B1-3),Uorg_mu*f*f*f*f,U_mu); | ||||||
|  | 	U_mu = where(mod(coor,B1)==Integer(1)   ,Uorg_mu*f*f*f*f,U_mu); | ||||||
|  |  | ||||||
|  | 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
| @@ -2,14 +2,11 @@ | |||||||
| 
 | 
 | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
| 
 | 
 | ||||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | Source file: ./lib/qcd/action/momentum/Domains.h | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2015, 2020 | Copyright (C) 2021 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> |  | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University |  | ||||||
| 
 | 
 | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -28,24 +25,15 @@ with this program; if not, write to the Free Software Foundation, Inc., | |||||||
| See the full license in the file "LICENSE" in the top level distribution | See the full license in the file "LICENSE" in the top level distribution | ||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | 			   /*  END LEGAL */ | ||||||
| #include <Grid/qcd/action/fermion/FermionCore.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> |  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> |  | ||||||
| 
 | 
 | ||||||
| #ifndef AVX512 | ////////////////////////////////////////////////////
 | ||||||
| #ifndef QPX | // Dirichlet filter with sub-block size B[mu]
 | ||||||
| #ifndef A64FX | ////////////////////////////////////////////////////
 | ||||||
| #ifndef A64FXFIXEDSIZE | #pragma once  | ||||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_BEGIN(Grid); | #include <Grid/qcd/action/domains/DomainDecomposition.h> | ||||||
|  | #include <Grid/qcd/action/domains/MomentumFilter.h> | ||||||
|  | #include <Grid/qcd/action/domains/DirichletFilter.h> | ||||||
|  | #include <Grid/qcd/action/domains/DDHMCFilter.h> | ||||||
| 
 | 
 | ||||||
| #include "impl.h" |  | ||||||
| template class WilsonKernels<IMPLEMENTATION>; |  | ||||||
| 
 |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -28,8 +28,7 @@ directory | |||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| /*  END LEGAL */ | /*  END LEGAL */ | ||||||
| //--------------------------------------------------------------------
 | //--------------------------------------------------------------------
 | ||||||
| #ifndef MOMENTUM_FILTER | #pragma once  | ||||||
| #define MOMENTUM_FILTER |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
| 
 | 
 | ||||||
| @@ -37,7 +36,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| 
 | 
 | ||||||
| template<typename MomentaField> | template<typename MomentaField> | ||||||
| struct MomentumFilterBase{ | struct MomentumFilterBase{ | ||||||
|   virtual void applyFilter(MomentaField &P) const; |   virtual void applyFilter(MomentaField &P) const = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| //Do nothing
 | //Do nothing
 | ||||||
| @@ -90,5 +89,3 @@ struct MomentumFilterApplyPhase: public MomentumFilterBase<MomentaField>{ | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
| 
 |  | ||||||
| #endif |  | ||||||
| @@ -60,6 +60,8 @@ public: | |||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   virtual void Dminus(const FermionField &psi, FermionField &chi); |   virtual void Dminus(const FermionField &psi, FermionField &chi); | ||||||
|   virtual void DminusDag(const FermionField &psi, FermionField &chi); |   virtual void DminusDag(const FermionField &psi, FermionField &chi); | ||||||
|  |   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported); | ||||||
|  |   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported); | ||||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); |   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||||
|   virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); |   virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); | ||||||
|   virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); |   virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); | ||||||
|   | |||||||
							
								
								
									
										185
									
								
								Grid/qcd/action/fermion/DirichletFermionOperator.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								Grid/qcd/action/fermion/DirichletFermionOperator.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,185 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/qcd/action/fermion/DirichletFermionOperator.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | //////////////////////////////////////////////////////////////// | ||||||
|  | // Wrap a fermion operator in Dirichlet BC's at node boundary | ||||||
|  | //////////////////////////////////////////////////////////////// | ||||||
|  |      | ||||||
|  | template<class Impl> | ||||||
|  | class DirichletFermionOperator : public FermionOperator<Impl> | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   INHERIT_IMPL_TYPES(Impl); | ||||||
|  |  | ||||||
|  |   // Data members | ||||||
|  |   int CommsMode; | ||||||
|  |   Coordinate Block; | ||||||
|  |   DirichletFilter<GaugeField> Filter; | ||||||
|  |   FermionOperator<Impl> & FermOp; | ||||||
|  |    | ||||||
|  |   // Constructor / bespoke | ||||||
|  |   DirichletFermionOperator(FermionOperator<Impl> & _FermOp, Coordinate &_Block) | ||||||
|  |     : FermOp(_FermOp), Block(_Block), Filter(Block) | ||||||
|  |   { | ||||||
|  |     // Save what the comms mode should be under normal BCs | ||||||
|  |     CommsMode = WilsonKernelsStatic::Comms; | ||||||
|  |     assert((CommsMode == WilsonKernelsStatic::CommsAndCompute) | ||||||
|  |          ||(CommsMode == WilsonKernelsStatic::CommsThenCompute)); | ||||||
|  |  | ||||||
|  |     // Check the block size divides local lattice | ||||||
|  |     GridBase *grid = FermOp.GaugeGrid(); | ||||||
|  |  | ||||||
|  |     int blocks_per_rank = 1; | ||||||
|  |     Coordinate LocalDims = grid->LocalDimensions(); | ||||||
|  |     Coordinate GlobalDims= grid->GlobalDimensions(); | ||||||
|  |     assert(Block.size()==LocalDims.size()); | ||||||
|  |  | ||||||
|  |     for(int d=0;d<LocalDims.size();d++){ | ||||||
|  |       if (Block[d]&&(Block[d]<=GlobalDims[d])){ | ||||||
|  | 	int r = LocalDims[d] % Block[d]; | ||||||
|  | 	assert(r == 0); | ||||||
|  | 	blocks_per_rank *= (LocalDims[d] / Block[d]); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     // Even blocks per node required // could be relaxed but inefficient use of hardware as idle nodes in boundary operator R | ||||||
|  |     assert( blocks_per_rank != 0); | ||||||
|  |  | ||||||
|  |     // Possible checks that SIMD lanes are used with full occupancy??? | ||||||
|  |   }; | ||||||
|  |   virtual ~DirichletFermionOperator(void) = default; | ||||||
|  |  | ||||||
|  |   void DirichletOn(void)   { | ||||||
|  |     assert(WilsonKernelsStatic::Comms!= WilsonKernelsStatic::CommsDirichlet); | ||||||
|  |     //    WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsDirichlet; | ||||||
|  |   } | ||||||
|  |   void DirichletOff(void)  { | ||||||
|  |     //    assert(WilsonKernelsStatic::Comms== WilsonKernelsStatic::CommsDirichlet); | ||||||
|  |     //    WilsonKernelsStatic::Comms = CommsMode; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Implement the full interface | ||||||
|  |   virtual FermionField &tmp(void) { return FermOp.tmp(); }; | ||||||
|  |  | ||||||
|  |   virtual GridBase *FermionGrid(void)         { return FermOp.FermionGrid(); } | ||||||
|  |   virtual GridBase *FermionRedBlackGrid(void) { return FermOp.FermionRedBlackGrid(); } | ||||||
|  |   virtual GridBase *GaugeGrid(void)           { return FermOp.GaugeGrid(); } | ||||||
|  |   virtual GridBase *GaugeRedBlackGrid(void)   { return FermOp.GaugeRedBlackGrid(); } | ||||||
|  |    | ||||||
|  |   // override multiply | ||||||
|  |   virtual void  M    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.M(in,out);    DirichletOff();  }; | ||||||
|  |   virtual void  Mdag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mdag(in,out); DirichletOff();  }; | ||||||
|  |  | ||||||
|  |   // half checkerboard operaions | ||||||
|  |   virtual void   Meooe       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Meooe(in,out);    DirichletOff(); };   | ||||||
|  |   virtual void   MeooeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MeooeDag(in,out); DirichletOff(); }; | ||||||
|  |   virtual void   Mooee       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mooee(in,out);    DirichletOff(); }; | ||||||
|  |   virtual void   MooeeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeDag(in,out); DirichletOff(); }; | ||||||
|  |   virtual void   MooeeInv    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInv(in,out); DirichletOff(); }; | ||||||
|  |   virtual void   MooeeInvDag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInvDag(in,out); DirichletOff(); }; | ||||||
|  |  | ||||||
|  |   // non-hermitian hopping term; half cb or both | ||||||
|  |   virtual void Dhop  (const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.Dhop(in,out,dag);    DirichletOff(); }; | ||||||
|  |   virtual void DhopOE(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopOE(in,out,dag);  DirichletOff(); }; | ||||||
|  |   virtual void DhopEO(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopEO(in,out,dag);  DirichletOff(); }; | ||||||
|  |   virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp) { DirichletOn(); FermOp.DhopDir(in,out,dir,disp);  DirichletOff(); }; | ||||||
|  |  | ||||||
|  |   // force terms; five routines; default to Dhop on diagonal | ||||||
|  |   virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MDeriv(mat,U,V,dag);}; | ||||||
|  |   virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MoeDeriv(mat,U,V,dag);}; | ||||||
|  |   virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeoDeriv(mat,U,V,dag);}; | ||||||
|  |   virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MooDeriv(mat,U,V,dag);}; | ||||||
|  |   virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeeDeriv(mat,U,V,dag);}; | ||||||
|  |  | ||||||
|  |   virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDeriv(mat,U,V,dag);}; | ||||||
|  |   virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivEO(mat,U,V,dag);}; | ||||||
|  |   virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivOE(mat,U,V,dag);}; | ||||||
|  |  | ||||||
|  |   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);}; | ||||||
|  |   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp){FermOp.Mdir(in,out,dir,disp);}; | ||||||
|  |   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)    {FermOp.MdirAll(in,out);}; | ||||||
|  |  | ||||||
|  |   /////////////////////////////////////////////// | ||||||
|  |   // Updates gauge field during HMC | ||||||
|  |   /////////////////////////////////////////////// | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeField(void){ return FermOp.GetDoubledGaugeField(); }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return FermOp.GetDoubledGaugeFieldE(); }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return FermOp.GetDoubledGaugeFieldO(); }; | ||||||
|  |   virtual void ImportGauge(const GaugeField & _U) | ||||||
|  |   { | ||||||
|  |     GaugeField U = _U; | ||||||
|  |     // Filter gauge field to apply Dirichlet | ||||||
|  |     Filter.applyFilter(U); | ||||||
|  |     FermOp.ImportGauge(U); | ||||||
|  |   } | ||||||
|  |   /////////////////////////////////////////////// | ||||||
|  |   // Physical field import/export | ||||||
|  |   /////////////////////////////////////////////// | ||||||
|  |   virtual void Dminus(const FermionField &psi, FermionField &chi)    { FermOp.Dminus(psi,chi); } | ||||||
|  |   virtual void DminusDag(const FermionField &psi, FermionField &chi) { FermOp.DminusDag(psi,chi); } | ||||||
|  |   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported)   { FermOp.ImportFourDimPseudoFermion(input,imported);} | ||||||
|  |   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported){ FermOp.ExportFourDimPseudoFermion(solution,exported);} | ||||||
|  |   virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)  { FermOp.ImportPhysicalFermionSource(input,imported);} | ||||||
|  |   virtual void ImportUnphysicalFermion(const FermionField &input,FermionField &imported)      { FermOp.ImportUnphysicalFermion(input,imported);} | ||||||
|  |   virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported) {FermOp.ExportPhysicalFermionSolution(solution,exported);} | ||||||
|  |   virtual void ExportPhysicalFermionSource(const FermionField &solution,FermionField &exported)   {FermOp.ExportPhysicalFermionSource(solution,exported);} | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   // Should never be used | ||||||
|  |   ////////////////////////////////////////////////////////////////////// | ||||||
|  |   virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||||
|  |   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {assert(0);} | ||||||
|  |   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { assert(0);} | ||||||
|  |   virtual void ContractConservedCurrent(PropagatorField &q_in_1, | ||||||
|  | 					PropagatorField &q_in_2, | ||||||
|  | 					PropagatorField &q_out, | ||||||
|  | 					PropagatorField &phys_src, | ||||||
|  | 					Current curr_type, | ||||||
|  | 					unsigned int mu) | ||||||
|  |   {assert(0);}; | ||||||
|  |   virtual void SeqConservedCurrent(PropagatorField &q_in,  | ||||||
|  | 				   PropagatorField &q_out, | ||||||
|  | 				   PropagatorField &phys_src, | ||||||
|  | 				   Current curr_type, | ||||||
|  | 				   unsigned int mu, | ||||||
|  | 				   unsigned int tmin,  | ||||||
|  | 				   unsigned int tmax, | ||||||
|  | 				   ComplexField &lattice_cmplx) | ||||||
|  |   {assert(0);}; | ||||||
|  |       // Only reimplemented in Wilson5D  | ||||||
|  |       // Default to just a zero correlation function | ||||||
|  |   virtual void ContractJ5q(FermionField &q_in   ,ComplexField &J5q) { J5q=Zero(); }; | ||||||
|  |   virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); }; | ||||||
|  |    | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| @@ -101,6 +101,12 @@ NAMESPACE_CHECK(WilsonTM5); | |||||||
| #include <Grid/qcd/action/fermion/PauliVillarsInverters.h> | #include <Grid/qcd/action/fermion/PauliVillarsInverters.h> | ||||||
| #include <Grid/qcd/action/fermion/Reconstruct5Dprop.h> | #include <Grid/qcd/action/fermion/Reconstruct5Dprop.h> | ||||||
| #include <Grid/qcd/action/fermion/MADWF.h> | #include <Grid/qcd/action/fermion/MADWF.h> | ||||||
|  | //////////////////////////////////////////////////////////////////// | ||||||
|  | // DDHMC related  | ||||||
|  | //////////////////////////////////////////////////////////////////// | ||||||
|  | #include <Grid/qcd/action/fermion/DirichletFermionOperator.h> | ||||||
|  | #include <Grid/qcd/action/fermion/SchurFactoredFermionOperator.h> | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(DWFutils); | NAMESPACE_CHECK(DWFutils); | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -115,9 +121,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR; | |||||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||||
|  |  | ||||||
| typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | ||||||
| typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | ||||||
| typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | ||||||
| typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | ||||||
| @@ -158,41 +164,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; | |||||||
| typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | ||||||
| typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | ||||||
| typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | ||||||
| typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<WilsonImplR> MobiusFermionR; | typedef MobiusFermion<WilsonImplR> MobiusFermionR; | ||||||
| typedef MobiusFermion<WilsonImplF> MobiusFermionF; | typedef MobiusFermion<WilsonImplF> MobiusFermionF; | ||||||
| typedef MobiusFermion<WilsonImplD> MobiusFermionD; | typedef MobiusFermion<WilsonImplD> MobiusFermionD; | ||||||
|  |  | ||||||
| typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | ||||||
| typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | ||||||
| typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | ||||||
| typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | ||||||
| typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | ||||||
| typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | ||||||
| typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | ||||||
|  |  | ||||||
| typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | ||||||
| typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | ||||||
| typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | ||||||
|  |  | ||||||
| // Ls vectorised | // Ls vectorised | ||||||
| typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | ||||||
| @@ -235,49 +241,49 @@ typedef WilsonFermion<GparityWilsonImplR>     GparityWilsonFermionR; | |||||||
| typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | ||||||
| typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | ||||||
|  |  | ||||||
| typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | //typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | ||||||
| typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | //typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | ||||||
| typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | //typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | ||||||
| typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | ||||||
| typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | ||||||
| typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | ||||||
| typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | ||||||
|  |  | ||||||
| typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | ||||||
| typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | ||||||
| typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | ||||||
|  |  | ||||||
| typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | ||||||
| typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | ||||||
| typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | ||||||
|   | |||||||
| @@ -25,8 +25,7 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | |||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #ifndef  GRID_QCD_FERMION_CORE_H | #pragma once | ||||||
| #define  GRID_QCD_FERMION_CORE_H |  | ||||||
|  |  | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| @@ -45,4 +44,3 @@ NAMESPACE_CHECK(FermionOperator); | |||||||
| #include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions | #include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions | ||||||
| NAMESPACE_CHECK(Kernels); | NAMESPACE_CHECK(Kernels); | ||||||
|  |  | ||||||
| #endif |  | ||||||
|   | |||||||
| @@ -140,6 +140,9 @@ public: | |||||||
|   // Updates gauge field during HMC |   // Updates gauge field during HMC | ||||||
|   /////////////////////////////////////////////// |   /////////////////////////////////////////////// | ||||||
|   virtual void ImportGauge(const GaugeField & _U)=0; |   virtual void ImportGauge(const GaugeField & _U)=0; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeField(void)  =0; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void)  =0; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void)  =0; | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////////// | ||||||
|   // Conserved currents, either contract at sink or insert sequentially. |   // Conserved currents, either contract at sink or insert sequentially. | ||||||
| @@ -171,6 +174,16 @@ public: | |||||||
|       /////////////////////////////////////////////// |       /////////////////////////////////////////////// | ||||||
|       virtual void Dminus(const FermionField &psi, FermionField &chi)    { chi=psi; } |       virtual void Dminus(const FermionField &psi, FermionField &chi)    { chi=psi; } | ||||||
|       virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } |       virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } | ||||||
|  |  | ||||||
|  |       virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported) | ||||||
|  |       { | ||||||
|  | 	imported = input; | ||||||
|  |       }; | ||||||
|  |       virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported) | ||||||
|  |       { | ||||||
|  | 	exported=solution; | ||||||
|  |       }; | ||||||
|  |  | ||||||
|       virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) |       virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) | ||||||
|       { |       { | ||||||
| 	imported = input; | 	imported = input; | ||||||
|   | |||||||
| @@ -30,6 +30,18 @@ directory | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   Policy implementation for G-parity boundary conditions | ||||||
|  |  | ||||||
|  |   Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular | ||||||
|  |   field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first | ||||||
|  |   with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field | ||||||
|  |   to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to | ||||||
|  |   either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor. | ||||||
|  |  | ||||||
|  |   Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||||
|  |   mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs | ||||||
|  |  */ | ||||||
| template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal> | template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal> | ||||||
| class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > { | class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > { | ||||||
| public: | public: | ||||||
| @@ -113,7 +125,7 @@ public: | |||||||
|     || ((distance== 1)&&(icoor[direction]==1)) |     || ((distance== 1)&&(icoor[direction]==1)) | ||||||
|     || ((distance==-1)&&(icoor[direction]==0)); |     || ((distance==-1)&&(icoor[direction]==0)); | ||||||
|  |  | ||||||
|     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world |     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction | ||||||
|  |  | ||||||
|     //Apply the links |     //Apply the links | ||||||
|     int f_upper = permute_lane ? 1 : 0; |     int f_upper = permute_lane ? 1 : 0; | ||||||
| @@ -139,10 +151,10 @@ public: | |||||||
|     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code |     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code | ||||||
|     assert((sl == 1) || (sl == 2)); |     assert((sl == 1) || (sl == 2)); | ||||||
|  |  | ||||||
|     if ( SE->_around_the_world && St.parameters.twists[mmu] ) { |     //If this site is an global boundary site, perform the G-parity flavor twist | ||||||
|  |     if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) { | ||||||
|       if ( sl == 2 ) { |       if ( sl == 2 ) { | ||||||
|         | 	//Only do the twist for lanes on the edge of the physical node | ||||||
| 	ExtractBuffer<sobj> vals(Nsimd); | 	ExtractBuffer<sobj> vals(Nsimd); | ||||||
|  |  | ||||||
| 	extract(chi,vals); | 	extract(chi,vals); | ||||||
| @@ -197,6 +209,19 @@ public: | |||||||
|     reg = memory; |     reg = memory; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   //Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds | ||||||
|  |   inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){ | ||||||
|  |     autoView(poke_f0_v, poke_f0, CpuRead); | ||||||
|  |     autoView(poke_f1_v, poke_f1, CpuRead); | ||||||
|  |     autoView(Uds_v, Uds, CpuWrite); | ||||||
|  |     thread_foreach(ss,poke_f0_v,{ | ||||||
|  | 	Uds_v[ss](0)(mu) = poke_f0_v[ss](); | ||||||
|  | 	Uds_v[ss](1)(mu) = poke_f1_v[ss](); | ||||||
|  |       }); | ||||||
|  |   } | ||||||
|  |      | ||||||
|  |  | ||||||
|   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) |   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||||
|   { |   { | ||||||
|     conformable(Uds.Grid(),GaugeGrid); |     conformable(Uds.Grid(),GaugeGrid); | ||||||
| @@ -208,13 +233,18 @@ public: | |||||||
|     |     | ||||||
|     Lattice<iScalar<vInteger> > coor(GaugeGrid); |     Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++){ |     //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||||
|  |     //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs         | ||||||
|  |     for(int mu=0;mu<Nd-1;mu++){ | ||||||
|  |  | ||||||
|  |       if( Params.twists[mu] ){ | ||||||
| 	LatticeCoordinate(coor,mu); | 	LatticeCoordinate(coor,mu); | ||||||
|  |       } | ||||||
|            |            | ||||||
|       U     = PeekIndex<LorentzIndex>(Umu,mu); |       U     = PeekIndex<LorentzIndex>(Umu,mu); | ||||||
|       Uconj = conjugate(U); |       Uconj = conjugate(U); | ||||||
|       |       | ||||||
|  |       // Implement the isospin rotation sign on the boundary between f=1 and f=0 | ||||||
|       // This phase could come from a simple bc 1,1,-1,1 .. |       // This phase could come from a simple bc 1,1,-1,1 .. | ||||||
|       int neglink = GaugeGrid->GlobalDimensions()[mu]-1; |       int neglink = GaugeGrid->GlobalDimensions()[mu]-1; | ||||||
|       if ( Params.twists[mu] ) {  |       if ( Params.twists[mu] ) {  | ||||||
| @@ -260,6 +290,38 @@ public: | |||||||
|         }); |         }); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     { //periodic / antiperiodic temporal BCs | ||||||
|  |       int mu = Nd-1; | ||||||
|  |       int L   = GaugeGrid->GlobalDimensions()[mu]; | ||||||
|  |       int Lmu = L - 1; | ||||||
|  |  | ||||||
|  |       LatticeCoordinate(coor, mu); | ||||||
|  |  | ||||||
|  |       U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links | ||||||
|  |        | ||||||
|  |       GaugeLinkField *Upoke = &U; | ||||||
|  |  | ||||||
|  |       if(Params.twists[mu]){ //antiperiodic | ||||||
|  | 	Utmp =  where(coor == Lmu, -U, U); | ||||||
|  | 	Upoke = &Utmp; | ||||||
|  |       } | ||||||
|  |      | ||||||
|  |       Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links       | ||||||
|  |       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu); | ||||||
|  |  | ||||||
|  |       //Get the barrel-shifted field | ||||||
|  |       Utmp = adj(Cshift(U, mu, -1)); //is a forward shift! | ||||||
|  |       Upoke = &Utmp; | ||||||
|  |  | ||||||
|  |       if(Params.twists[mu]){ | ||||||
|  | 	U = where(coor == 0, -Utmp, Utmp);  //boundary phase | ||||||
|  | 	Upoke = &U; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       Uconj = conjugate(*Upoke); | ||||||
|  |       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4); | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|        |        | ||||||
|   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { |   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { | ||||||
| @@ -300,35 +362,55 @@ public: | |||||||
|   } |   } | ||||||
|   |   | ||||||
|   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { |   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { | ||||||
|  |     int Ls=Btilde.Grid()->_fdimensions[0]; | ||||||
|      |      | ||||||
|     int Ls = Btilde.Grid()->_fdimensions[0]; |  | ||||||
|          |  | ||||||
|     GaugeLinkField tmp(mat.Grid()); |  | ||||||
|     tmp = Zero(); |  | ||||||
|     { |     { | ||||||
|       autoView( tmp_v , tmp, CpuWrite); |       GridBase *GaugeGrid = mat.Grid(); | ||||||
|       autoView( Atilde_v , Atilde, CpuRead); |       Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||||
|       autoView( Btilde_v , Btilde, CpuRead); |  | ||||||
|       thread_for(ss,tmp.Grid()->oSites(),{ |       if( Params.twists[mu] ){ | ||||||
| 	  for (int s = 0; s < Ls; s++) { | 	LatticeCoordinate(coor,mu); | ||||||
| 	    int sF = s + Ls * ss; |  | ||||||
| 	    auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF])); |  | ||||||
| 	    tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |       autoView( mat_v , mat, AcceleratorWrite); | ||||||
|  |       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||||
|  |       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||||
|  |       accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{	   | ||||||
|  |   	  int sU=sss; | ||||||
|  |   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; | ||||||
|  |   	  ColorMatrixType sum; | ||||||
|  |   	  zeroit(sum); | ||||||
|  |   	  for(int s=0;s<Ls;s++){ | ||||||
|  |   	    int sF = s+Ls*sU; | ||||||
|  |   	    for(int spn=0;spn<Ns;spn++){ //sum over spin | ||||||
|  | 	      //Flavor 0 | ||||||
|  |   	      auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector | ||||||
|  |   	      auto aa = coalescedRead(Atilde_v[sF](0)(spn) ); | ||||||
|  |   	      sum = sum + outerProduct(bb,aa); | ||||||
|  |  | ||||||
|  |   	      //Flavor 1 | ||||||
|  |   	      bb = coalescedRead(Btilde_v[sF](1)(spn) ); | ||||||
|  |   	      aa = coalescedRead(Atilde_v[sF](1)(spn) ); | ||||||
|  |   	      sum = sum + conjugate(outerProduct(bb,aa)); | ||||||
|  |   	    } | ||||||
|  |   	  }	     | ||||||
|  |   	  coalescedWrite(mat_v[sU](mu)(), sum); | ||||||
|   	}); |   	}); | ||||||
|     } |     } | ||||||
|     PokeIndex<LorentzIndex>(mat, tmp, mu); |  | ||||||
|     return; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |    | ||||||
|  |  | ||||||
|  |    | ||||||
| }; | }; | ||||||
|  |  | ||||||
| typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR;  // Real.. whichever prec | typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR;  // Real.. whichever prec | ||||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | ||||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | ||||||
|   |   | ||||||
| typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | ||||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | ||||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -141,8 +141,11 @@ public: | |||||||
|   void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); |   void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); | ||||||
|   void ImportGaugeSimple(const GaugeField &_UUU    ,const GaugeField &_U); |   void ImportGaugeSimple(const GaugeField &_UUU    ,const GaugeField &_U); | ||||||
|   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); |   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; |   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; | ||||||
|   DoubledGaugeField &GetUUU(void) { return UUUmu; }; |   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; | ||||||
|  |   virtual DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|  |   virtual DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||||
|   void CopyGaugeCheckerboards(void); |   void CopyGaugeCheckerboards(void); | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -167,6 +167,9 @@ public: | |||||||
|   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); |   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||||
|   // Give a reference; can be used to do an assignment or copy back out after import |   // Give a reference; can be used to do an assignment or copy back out after import | ||||||
|   // if Carleton wants to cache them and not use the ImportSimple |   // if Carleton wants to cache them and not use the ImportSimple | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; | ||||||
|  |   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; | ||||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; |   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|   DoubledGaugeField &GetUUU(void) { return UUUmu; }; |   DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||||
|   void CopyGaugeCheckerboards(void); |   void CopyGaugeCheckerboards(void); | ||||||
|   | |||||||
| @@ -135,6 +135,9 @@ public: | |||||||
|  |  | ||||||
|   // DoubleStore impl dependent |   // DoubleStore impl dependent | ||||||
|   void ImportGauge      (const GaugeField &_U ); |   void ImportGauge      (const GaugeField &_U ); | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; |   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|   void CopyGaugeCheckerboards(void); |   void CopyGaugeCheckerboards(void); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										534
									
								
								Grid/qcd/action/fermion/SchurFactoredFermionOperator.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										534
									
								
								Grid/qcd/action/fermion/SchurFactoredFermionOperator.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,534 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./lib/qcd/action/fermion/SchurFactoredFermionOperator.h | ||||||
|  |  | ||||||
|  |     Copyright (C) 2021 | ||||||
|  |  | ||||||
|  | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <Grid/qcd/utils/MixedPrecisionOperatorFunction.h> | ||||||
|  | #include <Grid/qcd/action/domains/Domains.h> | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |   // Some explanation of class structure for domain decomposition: | ||||||
|  |   // | ||||||
|  |   // Need a dirichlet operator for two flavour determinant - acts on both Omega and OmegaBar. | ||||||
|  |   // | ||||||
|  |   // Possible gain if the global sums and CG are run independently?? Could measure this. | ||||||
|  |   // | ||||||
|  |   // Types of operations | ||||||
|  |   // | ||||||
|  |   // 1) assemble local det dOmega det dOmegaBar pseudofermion | ||||||
|  |   // | ||||||
|  |   // - DirichletFermionOperator - can either do a global solve, or independent/per cell coefficients. | ||||||
|  |   // | ||||||
|  |   // 2) assemble dOmegaInverse and dOmegaBarInverse in R | ||||||
|  |   // | ||||||
|  |   // - DirichletFermionOperator - can also be used to  | ||||||
|  |   //                                       - need two or more cells per node. Options | ||||||
|  |   //                                       - a) solve one cell at a time, no new code, CopyRegion and reduced /split Grids | ||||||
|  |   //                                       - b) solve multiple cells in parallel. predicated dslash implementation | ||||||
|  |   // | ||||||
|  |   //                                       - b) has more parallelism, experience with block solver suggest might not be aalgorithmically inefficient | ||||||
|  |   //                                         a) has more cache friendly and easier code. | ||||||
|  |   //                                         b) is easy to implement in a "trial" or inefficient code with projection. | ||||||
|  |   // | ||||||
|  |   // 3)  Additional functionality for domain operations | ||||||
|  |   // | ||||||
|  |   // - SchurFactoredFermionOperator  - Need a DDHMC utility - whether used in two flavour or one flavour  | ||||||
|  |   // | ||||||
|  |   // - dBoundary - needs non-dirichlet operator | ||||||
|  |   // - Contains one Dirichlet Op, and one non-Dirichlet op. Implements dBoundary etc... | ||||||
|  |   // - The Dirichlet ops can be passed to dOmega(Bar) solvers etc... | ||||||
|  |   // | ||||||
|  |   //////////////////////////////////////////////////////// | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class ImplD,class ImplF> | ||||||
|  | class SchurFactoredFermionOperator : public ImplD | ||||||
|  | { | ||||||
|  |   INHERIT_IMPL_TYPES(ImplD); | ||||||
|  |    | ||||||
|  |   typedef typename ImplF::FermionField FermionFieldF; | ||||||
|  |   typedef typename ImplD::FermionField FermionFieldD; | ||||||
|  |  | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorD; | ||||||
|  |   typedef SchurDiagMooeeOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorF; | ||||||
|  |   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorDagD; | ||||||
|  |   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorDagF; | ||||||
|  |  | ||||||
|  |   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, | ||||||
|  | 							  FermionOperator<ImplF>, | ||||||
|  | 							  LinearOperatorD, | ||||||
|  | 							  LinearOperatorF> MxPCG; | ||||||
|  |  | ||||||
|  |   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, | ||||||
|  | 							  FermionOperator<ImplF>, | ||||||
|  | 							  LinearOperatorDagD, | ||||||
|  | 							  LinearOperatorDagF> MxDagPCG; | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   GridBase *FermionGrid(void) { return PeriodicFermOpD.FermionGrid(); }; | ||||||
|  |   GridBase *GaugeGrid(void)   { return PeriodicFermOpD.GaugeGrid(); }; | ||||||
|  |    | ||||||
|  |   FermionOperator<ImplD> & DirichletFermOpD; | ||||||
|  |   FermionOperator<ImplF> & DirichletFermOpF; | ||||||
|  |   FermionOperator<ImplD> & PeriodicFermOpD;  | ||||||
|  |   FermionOperator<ImplF> & PeriodicFermOpF;  | ||||||
|  |  | ||||||
|  |   LinearOperatorD DirichletLinOpD; | ||||||
|  |   LinearOperatorF DirichletLinOpF; | ||||||
|  |   LinearOperatorD PeriodicLinOpD; | ||||||
|  |   LinearOperatorF PeriodicLinOpF; | ||||||
|  |  | ||||||
|  |   LinearOperatorDagD DirichletLinOpDagD; | ||||||
|  |   LinearOperatorDagF DirichletLinOpDagF; | ||||||
|  |   LinearOperatorDagD PeriodicLinOpDagD; | ||||||
|  |   LinearOperatorDagF PeriodicLinOpDagF; | ||||||
|  |  | ||||||
|  |   // Can tinker with these in the pseudofermion for force vs. action solves | ||||||
|  |   Integer maxinnerit; | ||||||
|  |   Integer maxouterit; | ||||||
|  |   RealD tol; | ||||||
|  |   RealD tolinner; | ||||||
|  |    | ||||||
|  |   Coordinate Block; | ||||||
|  |  | ||||||
|  |   DomainDecomposition Domains; | ||||||
|  |  | ||||||
|  |   SchurFactoredFermionOperator(FermionOperator<ImplD>  & _PeriodicFermOpD, | ||||||
|  | 			       FermionOperator<ImplF>  & _PeriodicFermOpF, | ||||||
|  | 			       FermionOperator<ImplD>  & _DirichletFermOpD, | ||||||
|  | 			       FermionOperator<ImplF>  & _DirichletFermOpF, | ||||||
|  | 			       Coordinate &_Block) | ||||||
|  |     : Block(_Block), Domains(Block), | ||||||
|  |  | ||||||
|  |       PeriodicFermOpD(_PeriodicFermOpD), | ||||||
|  |       PeriodicFermOpF(_PeriodicFermOpF), | ||||||
|  |       DirichletFermOpD(_DirichletFermOpD), | ||||||
|  |       DirichletFermOpF(_DirichletFermOpF), | ||||||
|  |       DirichletLinOpD(DirichletFermOpD), | ||||||
|  |       DirichletLinOpF(DirichletFermOpF), | ||||||
|  |       PeriodicLinOpD(PeriodicFermOpD), | ||||||
|  |       PeriodicLinOpF(PeriodicFermOpF), | ||||||
|  |       DirichletLinOpDagD(DirichletFermOpD), | ||||||
|  |       DirichletLinOpDagF(DirichletFermOpF), | ||||||
|  |       PeriodicLinOpDagD(PeriodicFermOpD), | ||||||
|  |       PeriodicLinOpDagF(PeriodicFermOpF) | ||||||
|  |   { | ||||||
|  |     tol=1.0e-10; | ||||||
|  |     tolinner=1.0e-6; | ||||||
|  |     maxinnerit=1000; | ||||||
|  |     maxouterit=10; | ||||||
|  |     assert(PeriodicFermOpD.FermionGrid() == DirichletFermOpD.FermionGrid()); | ||||||
|  |     assert(PeriodicFermOpF.FermionGrid() == DirichletFermOpF.FermionGrid()); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   enum Domain { Omega=0, OmegaBar=1 }; | ||||||
|  |  | ||||||
|  |   void ImportGauge(const GaugeField &Umu) | ||||||
|  |   { | ||||||
|  |     // Single precision will update in the mixed prec CG | ||||||
|  |     PeriodicFermOpD.ImportGauge(Umu); | ||||||
|  |     GaugeField dUmu(Umu.Grid()); | ||||||
|  |     dUmu=Umu; | ||||||
|  |     //    DirchletBCs(dUmu); | ||||||
|  |     DirichletFilter<GaugeField> Filter(Block); | ||||||
|  |     Filter.applyFilter(dUmu); | ||||||
|  |     DirichletFermOpD.ImportGauge(dUmu); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |   void ProjectBoundaryBothDomains (FermionField &f,int sgn) | ||||||
|  |   { | ||||||
|  |     assert((sgn==1)||(sgn==-1)); | ||||||
|  |     Real rsgn = sgn; | ||||||
|  |  | ||||||
|  |     Gamma::Algebra Gmu [] = { | ||||||
|  |       Gamma::Algebra::GammaX, | ||||||
|  |       Gamma::Algebra::GammaY, | ||||||
|  |       Gamma::Algebra::GammaZ, | ||||||
|  |       Gamma::Algebra::GammaT | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     GridBase *grid = f.Grid(); | ||||||
|  |     LatticeInteger  coor(grid); | ||||||
|  |     LatticeInteger  face(grid); | ||||||
|  |     LatticeInteger  one(grid); one = 1; | ||||||
|  |     LatticeInteger  zero(grid); zero = 0; | ||||||
|  |     LatticeInteger nface(grid); nface=Zero(); | ||||||
|  |      | ||||||
|  |     FermionField projected(grid); projected=Zero(); | ||||||
|  |     FermionField sp_proj  (grid); | ||||||
|  |  | ||||||
|  |     int dims = grid->Nd(); | ||||||
|  |     int isDWF= (dims==Nd+1); | ||||||
|  |     assert((dims==Nd)||(dims==Nd+1)); | ||||||
|  |     Coordinate Global=grid->GlobalDimensions(); | ||||||
|  |  | ||||||
|  |     for(int mu=0;mu<Nd;mu++){ | ||||||
|  |  | ||||||
|  |       if ( Block[mu] <= Global[mu+isDWF] ) { | ||||||
|  | 	// need to worry about DWF 5th dim first | ||||||
|  | 	LatticeCoordinate(coor,mu+isDWF);  | ||||||
|  |        | ||||||
|  | 	face = where(mod(coor,Block[mu]) == Integer(0),one,zero ); | ||||||
|  | 	nface = nface + face; | ||||||
|  |  | ||||||
|  | 	Gamma G(Gmu[mu]); | ||||||
|  | 	// Lower face receives (1-gamma)/2 in normal forward hopping term | ||||||
|  | 	sp_proj  = 0.5*(f-G*f*rsgn); | ||||||
|  | 	projected= where(face,sp_proj,projected); | ||||||
|  | 	//projected= where(face,f,projected); | ||||||
|  |        | ||||||
|  | 	face = where(mod(coor,Block[mu]) == Integer(Block[mu]-1) ,one,zero ); | ||||||
|  | 	nface = nface + face; | ||||||
|  |  | ||||||
|  | 	// Upper face receives (1+gamma)/2 in normal backward hopping term | ||||||
|  | 	sp_proj = 0.5*(f+G*f*rsgn); | ||||||
|  | 	projected= where(face,sp_proj,projected); | ||||||
|  | 	//projected= where(face,f,projected); | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |     } | ||||||
|  |     // Initial Zero() where nface==0. | ||||||
|  |     // Keep the spin projected faces where nface==1 | ||||||
|  |     // Full spinor where nface>=2 | ||||||
|  |     projected = where(nface>Integer(1),f,projected); | ||||||
|  |     f=projected; | ||||||
|  |   } | ||||||
|  | */ | ||||||
|  |   void ProjectBoundaryBothDomains (FermionField &f,int sgn) | ||||||
|  |   { | ||||||
|  |     assert((sgn==1)||(sgn==-1)); | ||||||
|  |     Real rsgn = sgn; | ||||||
|  |  | ||||||
|  |     Gamma::Algebra Gmu [] = { | ||||||
|  |       Gamma::Algebra::GammaX, | ||||||
|  |       Gamma::Algebra::GammaY, | ||||||
|  |       Gamma::Algebra::GammaZ, | ||||||
|  |       Gamma::Algebra::GammaT | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     GridBase *grid = f.Grid(); | ||||||
|  |     LatticeInteger  coor(grid); | ||||||
|  |     LatticeInteger  face(grid); | ||||||
|  |     LatticeInteger  one(grid);   one = 1; | ||||||
|  |     LatticeInteger  zero(grid); zero = 0; | ||||||
|  |     LatticeInteger  omega(grid); | ||||||
|  |     LatticeInteger  omegabar(grid); | ||||||
|  |     LatticeInteger  tmp(grid); | ||||||
|  |  | ||||||
|  |     omega=one;    Domains.ProjectDomain(omega,0); | ||||||
|  |     omegabar=one; Domains.ProjectDomain(omegabar,1); | ||||||
|  |      | ||||||
|  |     LatticeInteger nface(grid); nface=Zero(); | ||||||
|  |      | ||||||
|  |     FermionField projected(grid); projected=Zero(); | ||||||
|  |     FermionField sp_proj  (grid); | ||||||
|  |  | ||||||
|  |     int dims = grid->Nd(); | ||||||
|  |     int isDWF= (dims==Nd+1); | ||||||
|  |     assert((dims==Nd)||(dims==Nd+1)); | ||||||
|  |     Coordinate Global=grid->GlobalDimensions(); | ||||||
|  |  | ||||||
|  |     for(int mmu=0;mmu<Nd;mmu++){ | ||||||
|  |       Gamma G(Gmu[mmu]); | ||||||
|  |  | ||||||
|  |       // need to worry about DWF 5th dim first | ||||||
|  |       int mu = mmu+isDWF; | ||||||
|  |       if ( Block[mmu] && (Block[mmu] <= Global[mu]) ) { | ||||||
|  |  | ||||||
|  | 	// Lower face receives (1-gamma)/2 in normal forward hopping term | ||||||
|  |  	tmp = Cshift(omegabar,mu,-1); | ||||||
|  | 	tmp = tmp + omega; | ||||||
|  | 	face = where(tmp == Integer(2),one,zero ); | ||||||
|  |  | ||||||
|  |  	tmp = Cshift(omega,mu,-1); | ||||||
|  | 	tmp = tmp + omegabar; | ||||||
|  | 	face = where(tmp == Integer(2),one,face ); | ||||||
|  |  | ||||||
|  | 	nface = nface + face; | ||||||
|  |  | ||||||
|  | 	sp_proj  = 0.5*(f-G*f*rsgn); | ||||||
|  | 	projected= where(face,sp_proj,projected); | ||||||
|  |  | ||||||
|  | 	// Upper face receives (1+gamma)/2 in normal backward hopping term | ||||||
|  |  	tmp = Cshift(omegabar,mu,1); | ||||||
|  | 	tmp = tmp + omega; | ||||||
|  | 	face = where(tmp == Integer(2),one,zero ); | ||||||
|  |  | ||||||
|  |  	tmp = Cshift(omega,mu,1); | ||||||
|  | 	tmp = tmp + omegabar; | ||||||
|  | 	face = where(tmp == Integer(2),one,face ); | ||||||
|  |  | ||||||
|  | 	nface = nface + face; | ||||||
|  |  | ||||||
|  | 	sp_proj = 0.5*(f+G*f*rsgn); | ||||||
|  | 	projected= where(face,sp_proj,projected); | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |     } | ||||||
|  |     // Initial Zero() where nface==0. | ||||||
|  |     // Keep the spin projected faces where nface==1 | ||||||
|  |     // Full spinor where nface>=2 | ||||||
|  |     projected = where(nface>Integer(1),f,projected); | ||||||
|  |     f=projected; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void ProjectDomain(FermionField &f,int domain) | ||||||
|  |   { | ||||||
|  | /* | ||||||
|  |     GridBase *grid = f.Grid(); | ||||||
|  |     int dims = grid->Nd(); | ||||||
|  |     int isDWF= (dims==Nd+1); | ||||||
|  |     assert((dims==Nd)||(dims==Nd+1)); | ||||||
|  |  | ||||||
|  |     FermionField zz(grid); zz=Zero(); | ||||||
|  |     LatticeInteger coor(grid); | ||||||
|  |     LatticeInteger domaincb(grid); domaincb=Zero(); | ||||||
|  |     for(int d=0;d<Nd;d++){ | ||||||
|  |       LatticeCoordinate(coor,d+isDWF); | ||||||
|  |       domaincb = domaincb + div(coor,Block[d]); | ||||||
|  |     } | ||||||
|  |     f = where(mod(domaincb,2)==Integer(domain),f,zz); | ||||||
|  | */ | ||||||
|  |     Domains.ProjectDomain(f,domain); | ||||||
|  |  | ||||||
|  |   }; | ||||||
|  |   void ProjectOmegaBar   (FermionField &f) {ProjectDomain(f,OmegaBar);} | ||||||
|  |   void ProjectOmega      (FermionField &f) {ProjectDomain(f,Omega);} | ||||||
|  |   // See my notes(!). | ||||||
|  |   // Notation: Following Luscher, we introduce projectors $\hPdb$ with both spinor and space structure | ||||||
|  |   // projecting all spinor elements in $\Omega$ connected by $\Ddb$ to $\bar{\Omega}$, | ||||||
|  |   void ProjectBoundaryBar(FermionField &f) | ||||||
|  |   { | ||||||
|  |     ProjectBoundaryBothDomains(f,1); | ||||||
|  |     ProjectOmega(f); | ||||||
|  |   } | ||||||
|  |   // and $\hPd$ projecting all spinor elements in $\bar{\Omega}$ connected by $\Dd$ to $\Omega$. | ||||||
|  |   void ProjectBoundary   (FermionField &f) | ||||||
|  |   { | ||||||
|  |     ProjectBoundaryBothDomains(f,1); | ||||||
|  |     ProjectOmegaBar(f); | ||||||
|  |     //    DumpSliceNorm("ProjectBoundary",f,f.Grid()->Nd()-1); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void dBoundary    (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     PeriodicFermOpD.M(tmp,out); | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dBoundaryDag (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     PeriodicFermOpD.Mdag(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dBoundaryBar (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     PeriodicFermOpD.M(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dBoundaryBarDag (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     PeriodicFermOpD.Mdag(tmp,out); | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dOmega       (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     DirichletFermOpD.M(tmp,out); | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaBar    (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     DirichletFermOpD.M(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaDag       (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     DirichletFermOpD.Mdag(tmp,out); | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaBarDag    (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     DirichletFermOpD.Mdag(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaInv   (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     dOmegaInvAndOmegaBarInv(tmp,out); // Inefficient warning | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaBarInv(FermionField &in,FermionField &out) | ||||||
|  |   {     | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     dOmegaInvAndOmegaBarInv(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaDagInv   (FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmega(tmp); | ||||||
|  |     dOmegaDagInvAndOmegaBarDagInv(tmp,out); | ||||||
|  |     ProjectOmega(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaBarDagInv(FermionField &in,FermionField &out) | ||||||
|  |   {     | ||||||
|  |     FermionField tmp(in); | ||||||
|  |     ProjectOmegaBar(tmp); | ||||||
|  |     dOmegaDagInvAndOmegaBarDagInv(tmp,out); | ||||||
|  |     ProjectOmegaBar(out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaInvAndOmegaBarInv(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     MxPCG OmegaSolver(tol, | ||||||
|  | 		      tolinner, | ||||||
|  | 		      maxinnerit, | ||||||
|  | 		      maxouterit, | ||||||
|  | 		      DirichletFermOpF.FermionRedBlackGrid(), | ||||||
|  | 		      DirichletFermOpF, | ||||||
|  | 		      DirichletFermOpD, | ||||||
|  | 		      DirichletLinOpF, | ||||||
|  | 		      DirichletLinOpD); | ||||||
|  |     SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(OmegaSolver); | ||||||
|  |     PrecSolve(DirichletFermOpD,in,out); | ||||||
|  |   }; | ||||||
|  |   void dOmegaDagInvAndOmegaBarDagInv(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     MxDagPCG OmegaDagSolver(tol, | ||||||
|  | 			    tolinner, | ||||||
|  | 			    maxinnerit, | ||||||
|  | 			    maxouterit, | ||||||
|  | 			    DirichletFermOpF.FermionRedBlackGrid(), | ||||||
|  | 			    DirichletFermOpF, | ||||||
|  | 			    DirichletFermOpD, | ||||||
|  | 			    DirichletLinOpDagF, | ||||||
|  | 			    DirichletLinOpDagD); | ||||||
|  |     SchurRedBlackDiagMooeeDagSolve<FermionField> PrecSolve(OmegaDagSolver); | ||||||
|  |     PrecSolve(DirichletFermOpD,in,out); | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   // Rdag = Pdbar - DdbarDag DomegabarDagInv  DdDag DomegaDagInv Pdbar  | ||||||
|  |   void RDag(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     FermionField tmp2(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     out = in; | ||||||
|  |     ProjectBoundaryBar(out); | ||||||
|  |     dOmegaDagInv(out,tmp1);    | ||||||
|  |     dBoundaryDag(tmp1,tmp2);    | ||||||
|  |     dOmegaBarDagInv(tmp2,tmp1); | ||||||
|  |     dBoundaryBarDag(tmp1,tmp2);  | ||||||
|  |     out = out - tmp2; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   // R = Pdbar - Pdbar DomegaInv Dd DomegabarInv Ddbar | ||||||
|  |   void R(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     FermionField tmp2(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     out = in; | ||||||
|  |     ProjectBoundaryBar(out); | ||||||
|  |     dBoundaryBar(out,tmp1);  | ||||||
|  |     dOmegaBarInv(tmp1,tmp2); | ||||||
|  |     dBoundary(tmp2,tmp1);    | ||||||
|  |     dOmegaInv(tmp1,tmp2);    | ||||||
|  |     out = in - tmp2 ;        | ||||||
|  |     ProjectBoundaryBar(out); | ||||||
|  |     //    DumpSliceNorm("R",out,out.Grid()->Nd()-1); | ||||||
|  |   }; | ||||||
|  |    | ||||||
|  |   // R = Pdbar - Pdbar Dinv Ddbar  | ||||||
|  |   void RInv(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     dBoundaryBar(in,out); | ||||||
|  |     Dinverse(out,tmp1);   | ||||||
|  |     out =in -tmp1;  | ||||||
|  |     ProjectBoundaryBar(out); | ||||||
|  |   }; | ||||||
|  |   // R = Pdbar - DdbarDag DinvDag Pdbar  | ||||||
|  |   void RDagInv(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     FermionField tmp(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     FermionField Pin(PeriodicFermOpD.FermionGrid()); | ||||||
|  |     Pin = in; ProjectBoundaryBar(Pin); | ||||||
|  |     DinverseDag(Pin,out);   | ||||||
|  |     dBoundaryBarDag(out,tmp); | ||||||
|  |     out =Pin -tmp;  | ||||||
|  |   }; | ||||||
|  |   // Non-dirichlet inverter using red-black preconditioning | ||||||
|  |   void Dinverse(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     MxPCG DSolver(tol, | ||||||
|  | 		  tolinner, | ||||||
|  | 		  maxinnerit, | ||||||
|  | 		  maxouterit, | ||||||
|  | 		  PeriodicFermOpF.FermionRedBlackGrid(), | ||||||
|  | 		  PeriodicFermOpF, | ||||||
|  | 		  PeriodicFermOpD, | ||||||
|  | 		  PeriodicLinOpF, | ||||||
|  | 		  PeriodicLinOpD); | ||||||
|  |     SchurRedBlackDiagMooeeSolve<FermionField> Solve(DSolver); | ||||||
|  |     Solve(PeriodicFermOpD,in,out); | ||||||
|  |   } | ||||||
|  |   void DinverseDag(FermionField &in,FermionField &out) | ||||||
|  |   { | ||||||
|  |     MxDagPCG DdagSolver(tol, | ||||||
|  | 			tolinner, | ||||||
|  | 			maxinnerit, | ||||||
|  | 			maxouterit, | ||||||
|  | 			PeriodicFermOpF.FermionRedBlackGrid(), | ||||||
|  | 			PeriodicFermOpF, | ||||||
|  | 			PeriodicFermOpD, | ||||||
|  | 			PeriodicLinOpDagF, | ||||||
|  | 			PeriodicLinOpDagD); | ||||||
|  |     SchurRedBlackDiagMooeeDagSolve<FermionField> Solve(DdagSolver); | ||||||
|  |     Solve(PeriodicFermOpD,in,out); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| @@ -68,11 +68,12 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { |     typedef decltype(coalescedRead(buf)) sobj; | ||||||
|     _SiteHalfSpinor tmp; |     sobj sp; | ||||||
|     projector::Proj(tmp,in,mu,dag); |     auto sin = coalescedRead(in); | ||||||
|     vstream(buf[o],tmp); |     projector::Proj(sp,sin,mu,dag); | ||||||
|  |     coalescedWrite(buf,sp); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -82,13 +83,18 @@ public: | |||||||
| 				   const SiteHalfSpinor * __restrict__ vp0, | 				   const SiteHalfSpinor * __restrict__ vp0, | ||||||
| 				   const SiteHalfSpinor * __restrict__ vp1, | 				   const SiteHalfSpinor * __restrict__ vp1, | ||||||
| 				   Integer type,Integer o) const { | 				   Integer type,Integer o) const { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); | ||||||
|  | #else | ||||||
|     SiteHalfSpinor tmp1; |     SiteHalfSpinor tmp1; | ||||||
|     SiteHalfSpinor tmp2; |     SiteHalfSpinor tmp2; | ||||||
|     exchange(tmp1,tmp2,vp0[o],vp1[o],type); |     exchange(tmp1,tmp2,vp0[o],vp1[o],type); | ||||||
|     vstream(mp[2*o  ],tmp1); |     vstream(mp[2*o  ],tmp1); | ||||||
|     vstream(mp[2*o+1],tmp2); |     vstream(mp[2*o+1],tmp2); | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Have a decompression step if mpi data is not same */ |   /* Have a decompression step if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -105,6 +111,28 @@ public: | |||||||
| 					   const SiteSpinor * __restrict__ in, | 					   const SiteSpinor * __restrict__ in, | ||||||
| 					   Integer j,Integer k, Integer m,Integer type) const | 					   Integer j,Integer k, Integer m,Integer type) const | ||||||
|   { |   { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     typedef SiteSpinor vobj; | ||||||
|  |     typedef SiteHalfSpinor hvobj; | ||||||
|  |     typedef decltype(coalescedRead(*in))    sobj; | ||||||
|  |     typedef decltype(coalescedRead(*out0)) hsobj; | ||||||
|  |  | ||||||
|  |     unsigned int Nsimd = vobj::Nsimd(); | ||||||
|  |     unsigned int mask = Nsimd >> (type + 1); | ||||||
|  |     int lane = acceleratorSIMTlane(Nsimd); | ||||||
|  |     int j0 = lane &(~mask); // inner coor zero | ||||||
|  |     int j1 = lane |(mask) ; // inner coor one | ||||||
|  |     const vobj *vp0 = &in[k]; | ||||||
|  |     const vobj *vp1 = &in[m]; | ||||||
|  |     const vobj *vp = (lane&mask) ? vp1:vp0; | ||||||
|  |     auto sa = coalescedRead(*vp,j0); | ||||||
|  |     auto sb = coalescedRead(*vp,j1); | ||||||
|  |     hsobj psa, psb; | ||||||
|  |     projector::Proj(psa,sa,mu,dag); | ||||||
|  |     projector::Proj(psb,sb,mu,dag); | ||||||
|  |     coalescedWrite(out0[j],psa); | ||||||
|  |     coalescedWrite(out1[j],psb); | ||||||
|  | #else | ||||||
|     SiteHalfSpinor temp1, temp2; |     SiteHalfSpinor temp1, temp2; | ||||||
|     SiteHalfSpinor temp3, temp4; |     SiteHalfSpinor temp3, temp4; | ||||||
|     projector::Proj(temp1,in[k],mu,dag); |     projector::Proj(temp1,in[k],mu,dag); | ||||||
| @@ -112,6 +140,7 @@ public: | |||||||
|     exchange(temp3,temp4,temp1,temp2,type); |     exchange(temp3,temp4,temp1,temp2,type); | ||||||
|     vstream(out0[j],temp3); |     vstream(out0[j],temp3); | ||||||
|     vstream(out1[j],temp4); |     vstream(out1[j],temp4); | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -121,6 +150,7 @@ public: | |||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
| template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | ||||||
| class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | ||||||
| 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | ||||||
| @@ -149,13 +179,23 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { |     SiteHalfSpinor hsp; | ||||||
|     _SiteHalfSpinor hsp; |  | ||||||
|     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; |     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; | ||||||
|     projector::Proj(hsp,in,mu,dag); |     projector::Proj(hsp,in,mu,dag); | ||||||
|     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); |     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); | ||||||
|   } |   } | ||||||
|  |   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||||
|  | #ifdef GRID_SIMT | ||||||
|  |     typedef decltype(coalescedRead(buf)) sobj; | ||||||
|  |     sobj sp; | ||||||
|  |     auto sin = coalescedRead(in); | ||||||
|  |     projector::Proj(sp,sin,mu,dag); | ||||||
|  |     coalescedWrite(buf,sp); | ||||||
|  | #else | ||||||
|  |     projector::Proj(buf,in,mu,dag); | ||||||
|  | #endif | ||||||
|  |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Exchange includes precision change if mpi data is not same */ |   /* Exchange includes precision change if mpi data is not same */ | ||||||
| @@ -203,6 +243,7 @@ public: | |||||||
|   accelerator_inline bool DecompressionStep(void) const { return true; } |   accelerator_inline bool DecompressionStep(void) const { return true; } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | ||||||
|   class Projector {							\ |   class Projector {							\ | ||||||
| @@ -253,33 +294,8 @@ public: | |||||||
|   typedef typename Base::View_type View_type; |   typedef typename Base::View_type View_type; | ||||||
|   typedef typename Base::StencilVector StencilVector; |   typedef typename Base::StencilVector StencilVector; | ||||||
|  |  | ||||||
|   double timer0; |   void ZeroCountersi(void)  {  } | ||||||
|   double timer1; |   void Reporti(int calls)  {  } | ||||||
|   double timer2; |  | ||||||
|   double timer3; |  | ||||||
|   double timer4; |  | ||||||
|   double timer5; |  | ||||||
|   double timer6; |  | ||||||
|   uint64_t callsi; |  | ||||||
|   void ZeroCountersi(void) |  | ||||||
|   { |  | ||||||
|     timer0=0; |  | ||||||
|     timer1=0; |  | ||||||
|     timer2=0; |  | ||||||
|     timer3=0; |  | ||||||
|     timer4=0; |  | ||||||
|     timer5=0; |  | ||||||
|     timer6=0; |  | ||||||
|     callsi=0; |  | ||||||
|   } |  | ||||||
|   void Reporti(int calls) |  | ||||||
|   { |  | ||||||
|     if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl; |  | ||||||
|     if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate)   " <<timer1/calls <<std::endl; |  | ||||||
|     if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge )   " <<timer2/calls <<std::endl; |  | ||||||
|     if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl; |  | ||||||
|     if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::vector<int> surface_list; |   std::vector<int> surface_list; | ||||||
|  |  | ||||||
| @@ -287,8 +303,10 @@ public: | |||||||
| 		int npoints, | 		int npoints, | ||||||
| 		int checkerboard, | 		int checkerboard, | ||||||
| 		const std::vector<int> &directions, | 		const std::vector<int> &directions, | ||||||
| 		const std::vector<int> &distances,Parameters p)   | 		const std::vector<int> &distances, | ||||||
|     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)  | 		bool locally_periodic, | ||||||
|  | 		Parameters p)   | ||||||
|  |     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,locally_periodic,p) | ||||||
|   { |   { | ||||||
|     ZeroCountersi(); |     ZeroCountersi(); | ||||||
|     surface_list.resize(0); |     surface_list.resize(0); | ||||||
| @@ -321,26 +339,18 @@ public: | |||||||
|   { |   { | ||||||
|     std::vector<std::vector<CommsRequest_t> > reqs; |     std::vector<std::vector<CommsRequest_t> > reqs; | ||||||
|     this->HaloExchangeOptGather(source,compress); |     this->HaloExchangeOptGather(source,compress); | ||||||
|     double t1=usecond(); |  | ||||||
|     // Asynchronous MPI calls multidirectional, Isend etc... |     // Asynchronous MPI calls multidirectional, Isend etc... | ||||||
|     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. |     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. | ||||||
|     this->Communicate(); |     this->Communicate(); | ||||||
|     double t2=usecond(); timer1 += t2-t1; |  | ||||||
|     this->CommsMerge(compress); |     this->CommsMerge(compress); | ||||||
|     double t3=usecond(); timer2 += t3-t2; |  | ||||||
|     this->CommsMergeSHM(compress); |     this->CommsMergeSHM(compress); | ||||||
|     double t4=usecond(); timer3 += t4-t3; |  | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
|   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  |   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  | ||||||
|   { |   { | ||||||
|     this->Prepare(); |     this->Prepare(); | ||||||
|     double t0=usecond(); |  | ||||||
|     this->HaloGatherOpt(source,compress); |     this->HaloGatherOpt(source,compress); | ||||||
|     double t1=usecond(); |  | ||||||
|     timer0 += t1-t0; |  | ||||||
|     callsi++; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
| @@ -352,12 +362,9 @@ public: | |||||||
|     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; |     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; | ||||||
|     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; |     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; | ||||||
|  |  | ||||||
|     this->mpi3synctime_g-=usecond(); |  | ||||||
|     this->_grid->StencilBarrier(); |     this->_grid->StencilBarrier(); | ||||||
|     this->mpi3synctime_g+=usecond(); |  | ||||||
|  |  | ||||||
|     assert(source.Grid()==this->_grid); |     assert(source.Grid()==this->_grid); | ||||||
|     this->halogtime-=usecond(); |  | ||||||
|      |      | ||||||
|     this->u_comm_offset=0; |     this->u_comm_offset=0; | ||||||
|        |        | ||||||
| @@ -393,7 +400,6 @@ public: | |||||||
|     } |     } | ||||||
|     this->face_table_computed=1; |     this->face_table_computed=1; | ||||||
|     assert(this->u_comm_offset==this->_unified_buffer_size); |     assert(this->u_comm_offset==this->_unified_buffer_size); | ||||||
|     this->halogtime+=usecond(); |  | ||||||
|     accelerator_barrier(); |     accelerator_barrier(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -146,6 +146,9 @@ public: | |||||||
|   void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, |   void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||||
|                     const FermionField &in, FermionField &out, int dag); |                     const FermionField &in, FermionField &out, int dag); | ||||||
|  |  | ||||||
|  |   void DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||||
|  | 				  const FermionField &in, FermionField &out, int dag); | ||||||
|  |    | ||||||
|   void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, |   void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||||
| 				   const FermionField &in, FermionField &out, int dag); | 				   const FermionField &in, FermionField &out, int dag); | ||||||
|  |  | ||||||
| @@ -157,6 +160,9 @@ public: | |||||||
|  |  | ||||||
|   // DoubleStore impl dependent |   // DoubleStore impl dependent | ||||||
|   void ImportGauge(const GaugeField &_Umu); |   void ImportGauge(const GaugeField &_Umu); | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||||
|    |    | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   // Data members require to support the functionality |   // Data members require to support the functionality | ||||||
|   | |||||||
| @@ -166,6 +166,13 @@ public: | |||||||
| 			       FermionField &out, | 			       FermionField &out, | ||||||
| 			       int dag); | 			       int dag); | ||||||
|  |  | ||||||
|  |   void DhopInternalDirichletComms(StencilImpl & st, | ||||||
|  | 				  LebesgueOrder &lo, | ||||||
|  | 				  DoubledGaugeField &U, | ||||||
|  | 				  const FermionField &in,  | ||||||
|  | 				  FermionField &out, | ||||||
|  | 				  int dag); | ||||||
|  |    | ||||||
|   // Constructors |   // Constructors | ||||||
|   WilsonFermion5D(GaugeField &_Umu, |   WilsonFermion5D(GaugeField &_Umu, | ||||||
| 		  GridCartesian         &FiveDimGrid, | 		  GridCartesian         &FiveDimGrid, | ||||||
| @@ -174,19 +181,11 @@ public: | |||||||
| 		  GridRedBlackCartesian &FourDimRedBlackGrid, | 		  GridRedBlackCartesian &FourDimRedBlackGrid, | ||||||
| 		  double _M5,const ImplParams &p= ImplParams()); | 		  double _M5,const ImplParams &p= ImplParams()); | ||||||
|      |      | ||||||
|   // Constructors |  | ||||||
|   /* |  | ||||||
|     WilsonFermion5D(int simd,  |  | ||||||
|     GaugeField &_Umu, |  | ||||||
|     GridCartesian         &FiveDimGrid, |  | ||||||
|     GridRedBlackCartesian &FiveDimRedBlackGrid, |  | ||||||
|     GridCartesian         &FourDimGrid, |  | ||||||
|     double _M5,const ImplParams &p= ImplParams()); |  | ||||||
|   */ |  | ||||||
|      |  | ||||||
|   // DoubleStore |   // DoubleStore | ||||||
|   void ImportGauge(const GaugeField &_Umu); |   void ImportGauge(const GaugeField &_Umu); | ||||||
|      |   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||||
|  |   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   // Data members require to support the functionality |   // Data members require to support the functionality | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -243,17 +243,17 @@ typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffReal > WilsonImplR | |||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | ||||||
|   |   | ||||||
| typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | ||||||
|   | |||||||
| @@ -39,7 +39,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| class WilsonKernelsStatic {  | class WilsonKernelsStatic {  | ||||||
| public: | public: | ||||||
|   enum { OptGeneric, OptHandUnroll, OptInlineAsm }; |   enum { OptGeneric, OptHandUnroll, OptInlineAsm }; | ||||||
|   enum { CommsAndCompute, CommsThenCompute }; |   enum { CommsAndCompute, CommsThenCompute, CommsDirichlet }; | ||||||
|   static int Opt;   |   static int Opt;   | ||||||
|   static int Comms; |   static int Comms; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -112,7 +112,6 @@ void CayleyFermion5D<Impl>::ImportUnphysicalFermion(const FermionField &input4d, | |||||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); |   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||||
|   imported5d=tmp; |   imported5d=tmp; | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl>   | template<class Impl>   | ||||||
| void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) | void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) | ||||||
| { | { | ||||||
| @@ -127,6 +126,37 @@ void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &inpu | |||||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); |   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||||
|   Dminus(tmp,imported5d); |   Dminus(tmp,imported5d); | ||||||
| } | } | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | // Added for fourD pseudofermion det estimation | ||||||
|  | //////////////////////////////////////////////////// | ||||||
|  | template<class Impl>   | ||||||
|  | void CayleyFermion5D<Impl>::ImportFourDimPseudoFermion(const FermionField &input4d,FermionField &imported5d) | ||||||
|  | { | ||||||
|  |   int Ls = this->Ls; | ||||||
|  |   FermionField tmp(this->FermionGrid()); | ||||||
|  |   conformable(imported5d.Grid(),this->FermionGrid()); | ||||||
|  |   conformable(input4d.Grid()   ,this->GaugeGrid()); | ||||||
|  |   tmp = Zero(); | ||||||
|  |   InsertSlice(input4d, tmp, 0   , 0); | ||||||
|  |   InsertSlice(input4d, tmp, Ls-1, 0); | ||||||
|  |   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, 0, 0); | ||||||
|  |   axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||||
|  |   imported5d=tmp; | ||||||
|  | } | ||||||
|  | template<class Impl>   | ||||||
|  | void CayleyFermion5D<Impl>::ExportFourDimPseudoFermion(const FermionField &solution5d,FermionField &exported4d) | ||||||
|  | { | ||||||
|  |   int Ls = this->Ls; | ||||||
|  |   FermionField tmp(this->FermionGrid()); | ||||||
|  |   tmp = solution5d; | ||||||
|  |   conformable(solution5d.Grid(),this->FermionGrid()); | ||||||
|  |   conformable(exported4d.Grid(),this->GaugeGrid()); | ||||||
|  |   axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); | ||||||
|  |   axpby_ssp_pplus (tmp, 1., tmp       , 1., solution5d, 0, Ls-1); | ||||||
|  |   ExtractSlice(exported4d, tmp, 0, 0); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Dminus | ||||||
| template<class Impl>   | template<class Impl>   | ||||||
| void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
| @@ -880,7 +910,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   std::vector<RealD> G_s(Ls,1.0); |   std::vector<RealD> G_s(Ls,1.0); | ||||||
|   Integer sign = 1; // sign flip for vector/tadpole |   RealD sign = 1; // sign flip for vector/tadpole | ||||||
|   if ( curr_type == Current::Axial ) { |   if ( curr_type == Current::Axial ) { | ||||||
|     for(int s=0;s<Ls/2;s++){ |     for(int s=0;s<Ls/2;s++){ | ||||||
|       G_s[s] = -1.0; |       G_s[s] = -1.0; | ||||||
| @@ -901,8 +931,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|   for(int s=0;s<Ls;s++){ |   for(int s=0;s<Ls;s++){ | ||||||
|  |  | ||||||
|     int sp = (s+1)%Ls; |     int sp = (s+1)%Ls; | ||||||
|     int sr = Ls-1-s; |     //    int sr = Ls-1-s; | ||||||
|     int srp= (sr+1)%Ls; |     //    int srp= (sr+1)%Ls; | ||||||
|  |  | ||||||
|     // Mobius parameters |     // Mobius parameters | ||||||
|     auto b=this->bs[s]; |     auto b=this->bs[s]; | ||||||
|   | |||||||
| @@ -51,9 +51,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, | |||||||
|   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), |   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), | ||||||
|   _FourDimGrid        (&FourDimGrid), |   _FourDimGrid        (&FourDimGrid), | ||||||
|   _FourDimRedBlackGrid(&FourDimRedBlackGrid), |   _FourDimRedBlackGrid(&FourDimRedBlackGrid), | ||||||
|   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p), |   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p.locally_periodic,p), | ||||||
|   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even |   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p.locally_periodic,p), // source is Even | ||||||
|   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd |   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p.locally_periodic,p), // source is Odd | ||||||
|   M5(_M5), |   M5(_M5), | ||||||
|   Umu(_FourDimGrid), |   Umu(_FourDimGrid), | ||||||
|   UmuEven(_FourDimRedBlackGrid), |   UmuEven(_FourDimRedBlackGrid), | ||||||
| @@ -361,10 +361,21 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, | |||||||
|                                          const FermionField &in, FermionField &out,int dag) |                                          const FermionField &in, FermionField &out,int dag) | ||||||
| { | { | ||||||
|   DhopTotalTime-=usecond(); |   DhopTotalTime-=usecond(); | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) |  | ||||||
|  |   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) | ||||||
|  | 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) | ||||||
|  |          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); | ||||||
|  |  | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { | ||||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); |     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||||
|   else  |   } | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { | ||||||
|     DhopInternalSerialComms(st,lo,U,in,out,dag); |     DhopInternalSerialComms(st,lo,U,in,out,dag); | ||||||
|  |   } | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { | ||||||
|  |     DhopInternalDirichletComms(st,lo,U,in,out,dag); | ||||||
|  |   } | ||||||
|  |    | ||||||
|   DhopTotalTime+=usecond(); |   DhopTotalTime+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -431,6 +442,30 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | |||||||
|   DhopComputeTime2+=usecond(); |   DhopComputeTime2+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template<class Impl> | ||||||
|  | void WilsonFermion5D<Impl>::DhopInternalDirichletComms(StencilImpl & st, LebesgueOrder &lo, | ||||||
|  | 						       DoubledGaugeField & U, | ||||||
|  | 						       const FermionField &in, FermionField &out,int dag) | ||||||
|  | { | ||||||
|  |   Compressor compressor(dag); | ||||||
|  |  | ||||||
|  |   int LLs = in.Grid()->_rdimensions[0]; | ||||||
|  |   int len =  U.Grid()->oSites(); | ||||||
|  |        | ||||||
|  |   ///////////////////////////// | ||||||
|  |   // do the compute interior | ||||||
|  |   ///////////////////////////// | ||||||
|  |   int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know | ||||||
|  |   DhopComputeTime-=usecond(); | ||||||
|  |   if (dag == DaggerYes) { | ||||||
|  |     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); | ||||||
|  |   } else { | ||||||
|  |     Kernels::DhopKernel   (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); | ||||||
|  |   } | ||||||
|  |   accelerator_barrier(); | ||||||
|  |   DhopComputeTime+=usecond(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | ||||||
|   | |||||||
| @@ -47,9 +47,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | |||||||
|     Kernels(p), |     Kernels(p), | ||||||
|     _grid(&Fgrid), |     _grid(&Fgrid), | ||||||
|     _cbgrid(&Hgrid), |     _cbgrid(&Hgrid), | ||||||
|     Stencil(&Fgrid, npoint, Even, directions, displacements,p), |     Stencil(&Fgrid, npoint, Even, directions, displacements,p.locally_periodic,p), | ||||||
|     StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even |     StencilEven(&Hgrid, npoint, Even, directions,displacements,p.locally_periodic,p),  // source is Even | ||||||
|     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd |     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p.locally_periodic,p),  // source is Odd | ||||||
|     mass(_mass), |     mass(_mass), | ||||||
|     Lebesgue(_grid), |     Lebesgue(_grid), | ||||||
|     LebesgueEvenOdd(_cbgrid), |     LebesgueEvenOdd(_cbgrid), | ||||||
| @@ -488,12 +488,21 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | |||||||
|                                        FermionField &out, int dag) |                                        FermionField &out, int dag) | ||||||
| { | { | ||||||
|   DhopTotalTime-=usecond(); |   DhopTotalTime-=usecond(); | ||||||
| #ifdef GRID_OMP |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) |   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) | ||||||
|  | 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) | ||||||
|  |          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { | ||||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); |     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||||
|   else |   } | ||||||
| #endif |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { | ||||||
|     DhopInternalSerial(st,lo,U,in,out,dag); |     DhopInternalSerial(st,lo,U,in,out,dag); | ||||||
|  |   } | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { | ||||||
|  |     DhopInternalDirichletComms(st,lo,U,in,out,dag); | ||||||
|  |   } | ||||||
|   DhopTotalTime+=usecond(); |   DhopTotalTime+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -562,6 +571,29 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO | |||||||
|   DhopComputeTime2+=usecond(); |   DhopComputeTime2+=usecond(); | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | template <class Impl> | ||||||
|  | void WilsonFermion<Impl>::DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, | ||||||
|  | 						     DoubledGaugeField &U, | ||||||
|  | 						     const FermionField &in, | ||||||
|  | 						     FermionField &out, int dag) | ||||||
|  | { | ||||||
|  |   assert((dag == DaggerNo) || (dag == DaggerYes)); | ||||||
|  |  | ||||||
|  |   Compressor compressor(dag); | ||||||
|  |   int len =  U.Grid()->oSites(); | ||||||
|  |  | ||||||
|  |   ///////////////////////////// | ||||||
|  |   // do the compute interior | ||||||
|  |   ///////////////////////////// | ||||||
|  |   int Opt = WilsonKernelsStatic::Opt; | ||||||
|  |   DhopComputeTime-=usecond(); | ||||||
|  |   if (dag == DaggerYes) { | ||||||
|  |     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); | ||||||
|  |   } else { | ||||||
|  |     Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); | ||||||
|  |   } | ||||||
|  |   DhopComputeTime+=usecond(); | ||||||
|  | }; | ||||||
|  |  | ||||||
| template <class Impl> | template <class Impl> | ||||||
| void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, | void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, | ||||||
|   | |||||||
| @@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| @@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| template<> void | // template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | // | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
| @@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| template<> void  | //template<> void | ||||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonKernelsInstantiationGparity.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION GparityWilsonImplDF |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonKernelsInstantiationGparity.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION GparityWilsonImplFH |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonTMFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| #define IMPLEMENTATION WilsonImplDF |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../CayleyFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../ContinuedFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../DomainWallEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../MobiusEOFAFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../PartialFractionFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonCloverFermionInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermion5DInstantiation.cc.master |  | ||||||
| @@ -1 +0,0 @@ | |||||||
| ../WilsonFermionInstantiation.cc.master |  | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user