mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-24 17:54:47 +01:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			feature/dd
			...
			feature/ad
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | b284d50863 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -88,7 +88,6 @@ Thumbs.db | |||||||
| # build directory # | # build directory # | ||||||
| ################### | ################### | ||||||
| build*/* | build*/* | ||||||
| Documentation/_build |  | ||||||
|  |  | ||||||
| # IDE related files # | # IDE related files # | ||||||
| ##################### | ##################### | ||||||
|   | |||||||
| @@ -34,9 +34,6 @@ directory | |||||||
|  |  | ||||||
| #if defined __GNUC__ && __GNUC__>=6 | #if defined __GNUC__ && __GNUC__>=6 | ||||||
| #pragma GCC diagnostic ignored "-Wignored-attributes" | #pragma GCC diagnostic ignored "-Wignored-attributes" | ||||||
| #endif |  | ||||||
| #if defined __GNUC__ && __GNUC__>=6 |  | ||||||
| #pragma GCC diagnostic ignored "-Wpsabi" |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  //disables and intel compiler specific warning (in json.hpp) |  //disables and intel compiler specific warning (in json.hpp) | ||||||
|   | |||||||
| @@ -36,7 +36,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/qcd/QCD.h> | #include <Grid/qcd/QCD.h> | ||||||
| #include <Grid/qcd/spin/Spin.h> | #include <Grid/qcd/spin/Spin.h> | ||||||
| #include <Grid/qcd/gparity/Gparity.h> |  | ||||||
| #include <Grid/qcd/utils/Utils.h> | #include <Grid/qcd/utils/Utils.h> | ||||||
| #include <Grid/qcd/representations/Representations.h> | #include <Grid/qcd/representations/Representations.h> | ||||||
| NAMESPACE_CHECK(GridQCDCore); | NAMESPACE_CHECK(GridQCDCore); | ||||||
|   | |||||||
| @@ -54,7 +54,6 @@ NAMESPACE_CHECK(BiCGSTAB); | |||||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h> |  | ||||||
| #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h> | #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h> | ||||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | ||||||
|   | |||||||
| @@ -442,8 +442,6 @@ public: | |||||||
|     for(int p=0; p<geom.npoint; p++) |     for(int p=0; p<geom.npoint; p++) | ||||||
|       points[p] = geom.points_dagger[p]; |       points[p] = geom.points_dagger[p]; | ||||||
|  |  | ||||||
|     auto points_p = &points[0]; |  | ||||||
|  |  | ||||||
|     RealD* dag_factor_p = &dag_factor[0]; |     RealD* dag_factor_p = &dag_factor[0]; | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
| @@ -455,7 +453,7 @@ public: | |||||||
|       StencilEntry *SE; |       StencilEntry *SE; | ||||||
|  |  | ||||||
|       for(int p=0;p<geom_v.npoint;p++){ |       for(int p=0;p<geom_v.npoint;p++){ | ||||||
|         int point = points_p[p]; |         int point = points[p]; | ||||||
|  |  | ||||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
| @@ -710,8 +708,6 @@ public: | |||||||
|     for(int p=0; p<npoint; p++) |     for(int p=0; p<npoint; p++) | ||||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; |       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||||
|  |  | ||||||
|     auto points_p = &points[0]; |  | ||||||
|  |  | ||||||
|     Vector<Aview> AcceleratorViewContainer; |     Vector<Aview> AcceleratorViewContainer; | ||||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); |     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; |     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||||
| @@ -732,7 +728,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points_p[p]; |           int point = points[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
| @@ -758,7 +754,7 @@ public: | |||||||
|         StencilEntry *SE; |         StencilEntry *SE; | ||||||
|  |  | ||||||
|         for(int p=0;p<npoint;p++){ |         for(int p=0;p<npoint;p++){ | ||||||
|           int point = points_p[p]; |           int point = points[p]; | ||||||
|           SE=st_v.GetEntry(ptype,point,ss); |           SE=st_v.GetEntry(ptype,point,ss); | ||||||
|  |  | ||||||
|           if(SE->_is_local) { |           if(SE->_is_local) { | ||||||
|   | |||||||
| @@ -136,7 +136,7 @@ public: | |||||||
|     flops=0; |     flops=0; | ||||||
|     usec =0; |     usec =0; | ||||||
|     Coordinate layout(Nd,1); |     Coordinate layout(Nd,1); | ||||||
|     sgrid = new GridCartesian(dimensions,layout,processors,*grid); |     sgrid = new GridCartesian(dimensions,layout,processors); | ||||||
|   }; |   }; | ||||||
|      |      | ||||||
|   ~FFT ( void)  { |   ~FFT ( void)  { | ||||||
| @@ -182,7 +182,7 @@ public: | |||||||
|     pencil_gd[dim] = G*processors[dim]; |     pencil_gd[dim] = G*processors[dim]; | ||||||
|        |        | ||||||
|     // Pencil global vol LxLxGxLxL per node |     // Pencil global vol LxLxGxLxL per node | ||||||
|     GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid); |     GridCartesian pencil_g(pencil_gd,layout,processors); | ||||||
|        |        | ||||||
|     // Construct pencils |     // Construct pencils | ||||||
|     typedef typename vobj::scalar_object sobj; |     typedef typename vobj::scalar_object sobj; | ||||||
|   | |||||||
| @@ -223,14 +223,9 @@ class SchurOperatorBase :  public LinearOperatorBase<Field> { | |||||||
|     Mpc(in,tmp); |     Mpc(in,tmp); | ||||||
|     MpcDag(tmp,out); |     MpcDag(tmp,out); | ||||||
|   } |   } | ||||||
|   virtual  void MpcMpcDag(const Field &in, Field &out) { |  | ||||||
|     Field tmp(in.Grid()); |  | ||||||
|     tmp.Checkerboard() = in.Checkerboard(); |  | ||||||
|     MpcDag(in,tmp); |  | ||||||
|     Mpc(tmp,out); |  | ||||||
|   } |  | ||||||
|   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||||
|     HermOp(in,out); |     out.Checkerboard() = in.Checkerboard(); | ||||||
|  |     MpcDagMpc(in,out); | ||||||
|     ComplexD dot= innerProduct(in,out);  |     ComplexD dot= innerProduct(in,out);  | ||||||
|     n1=real(dot); |     n1=real(dot); | ||||||
|     n2=norm2(out); |     n2=norm2(out); | ||||||
| @@ -281,16 +276,6 @@ template<class Matrix,class Field> | |||||||
|       axpy(out,-1.0,tmp,out); |       axpy(out,-1.0,tmp,out); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| // Mpc MpcDag system presented as the HermOp |  | ||||||
| template<class Matrix,class Field> |  | ||||||
| class SchurDiagMooeeDagOperator :  public SchurDiagMooeeOperator<Matrix,Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void HermOp(const Field &in, Field &out){ |  | ||||||
|     out.Checkerboard() = in.Checkerboard(); |  | ||||||
|     this->MpcMpcDag(in,out); |  | ||||||
|   } |  | ||||||
|   SchurDiagMooeeDagOperator (Matrix &Mat): SchurDiagMooeeOperator<Matrix,Field>(Mat){}; |  | ||||||
| }; |  | ||||||
| template<class Matrix,class Field> | template<class Matrix,class Field> | ||||||
|   class SchurDiagOneOperator :  public SchurOperatorBase<Field> { |   class SchurDiagOneOperator :  public SchurOperatorBase<Field> { | ||||||
|  protected: |  protected: | ||||||
| @@ -545,16 +530,6 @@ public: | |||||||
| template<class Field> class LinearFunction { | template<class Field> class LinearFunction { | ||||||
| public: | public: | ||||||
|   virtual void operator() (const Field &in, Field &out) = 0; |   virtual void operator() (const Field &in, Field &out) = 0; | ||||||
|  |  | ||||||
|   virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out) |  | ||||||
|   { |  | ||||||
|     assert(in.size() == out.size()); |  | ||||||
|  |  | ||||||
|     for (unsigned int i = 0; i < in.size(); ++i) |  | ||||||
|     { |  | ||||||
|       (*this)(in[i], out[i]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||||
|   | |||||||
| @@ -292,7 +292,6 @@ public: | |||||||
| template<class Field> | template<class Field> | ||||||
| class ChebyshevLanczos : public Chebyshev<Field> { | class ChebyshevLanczos : public Chebyshev<Field> { | ||||||
| private: | private: | ||||||
|  |  | ||||||
|   std::vector<RealD> Coeffs; |   std::vector<RealD> Coeffs; | ||||||
|   int order; |   int order; | ||||||
|   RealD alpha; |   RealD alpha; | ||||||
|   | |||||||
| @@ -102,7 +102,7 @@ public: | |||||||
|     // Check if guess is really REALLY good :) |     // Check if guess is really REALLY good :) | ||||||
|     if (cp <= rsq) { |     if (cp <= rsq) { | ||||||
|       TrueResidual = std::sqrt(a/ssq); |       TrueResidual = std::sqrt(a/ssq); | ||||||
|       std::cout << GridLogMessage << "ConjugateGradient guess is converged already "<<TrueResidual<< " tol "<< Tolerance<< std::endl; |       std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; | ||||||
|       IterationsToComplete = 0;	 |       IterationsToComplete = 0;	 | ||||||
|       return; |       return; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -48,29 +48,19 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     Integer TotalInnerIterations; //Number of inner CG iterations |     Integer TotalInnerIterations; //Number of inner CG iterations | ||||||
|     Integer TotalOuterIterations; //Number of restarts |     Integer TotalOuterIterations; //Number of restarts | ||||||
|     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step |     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||||
|     RealD TrueResidual; |  | ||||||
|  |  | ||||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess |     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||||
|     LinearFunction<FieldF> *guesser; |     LinearFunction<FieldF> *guesser; | ||||||
|      |      | ||||||
|     MixedPrecisionConjugateGradient(RealD Tol, |     MixedPrecisionConjugateGradient(RealD tol,  | ||||||
| 				    Integer maxinnerit,  |  | ||||||
| 				    Integer maxouterit,  |  | ||||||
| 				    GridBase* _sp_grid,  |  | ||||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  |  | ||||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : |  | ||||||
|       MixedPrecisionConjugateGradient(Tol, Tol, maxinnerit, maxouterit, _sp_grid, _Linop_f, _Linop_d) {}; |  | ||||||
|  |  | ||||||
|     MixedPrecisionConjugateGradient(RealD Tol, |  | ||||||
| 				    RealD InnerTol, |  | ||||||
| 				    Integer maxinnerit,  | 				    Integer maxinnerit,  | ||||||
| 				    Integer maxouterit,  | 				    Integer maxouterit,  | ||||||
| 				    GridBase* _sp_grid,  | 				    GridBase* _sp_grid,  | ||||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  | 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : | 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), |       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||||
|       Tolerance(Tol), InnerTolerance(InnerTol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), |       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||||
|       OuterLoopNormMult(100.), guesser(NULL){ assert(InnerTol < 1.0e-1);}; |       OuterLoopNormMult(100.), guesser(NULL){ }; | ||||||
|  |  | ||||||
|     void useGuesser(LinearFunction<FieldF> &g){ |     void useGuesser(LinearFunction<FieldF> &g){ | ||||||
|       guesser = &g; |       guesser = &g; | ||||||
| @@ -89,11 +79,6 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     RealD stop = src_norm * Tolerance*Tolerance; |     RealD stop = src_norm * Tolerance*Tolerance; | ||||||
|  |  | ||||||
|     GridBase* DoublePrecGrid = src_d_in.Grid(); |     GridBase* DoublePrecGrid = src_d_in.Grid(); | ||||||
|  |  | ||||||
|     //Generate precision change workspaces |  | ||||||
|     precisionChangeWorkspace wk_dp_from_sp(DoublePrecGrid, SinglePrecGrid); |  | ||||||
|     precisionChangeWorkspace wk_sp_from_dp(SinglePrecGrid, DoublePrecGrid); |  | ||||||
|  |  | ||||||
|     FieldD tmp_d(DoublePrecGrid); |     FieldD tmp_d(DoublePrecGrid); | ||||||
|     tmp_d.Checkerboard() = cb; |     tmp_d.Checkerboard() = cb; | ||||||
|      |      | ||||||
| @@ -134,7 +119,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? |       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? | ||||||
|  |  | ||||||
|       PrecChangeTimer.Start(); |       PrecChangeTimer.Start(); | ||||||
|       precisionChange(src_f, src_d, wk_sp_from_dp); |       precisionChange(src_f, src_d); | ||||||
|       PrecChangeTimer.Stop(); |       PrecChangeTimer.Stop(); | ||||||
|        |        | ||||||
|       sol_f = Zero(); |       sol_f = Zero(); | ||||||
| @@ -152,7 +137,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
|        |        | ||||||
|       //Convert sol back to double and add to double prec solution |       //Convert sol back to double and add to double prec solution | ||||||
|       PrecChangeTimer.Start(); |       PrecChangeTimer.Start(); | ||||||
|       precisionChange(tmp_d, sol_f, wk_dp_from_sp); |       precisionChange(tmp_d, sol_f); | ||||||
|       PrecChangeTimer.Stop(); |       PrecChangeTimer.Stop(); | ||||||
|        |        | ||||||
|       axpy(sol_d, 1.0, tmp_d, sol_d); |       axpy(sol_d, 1.0, tmp_d, sol_d); | ||||||
| @@ -164,7 +149,6 @@ NAMESPACE_BEGIN(Grid); | |||||||
|     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); |     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||||
|     CG_d(Linop_d, src_d_in, sol_d); |     CG_d(Linop_d, src_d_in, sol_d); | ||||||
|     TotalFinalStepIterations = CG_d.IterationsToComplete; |     TotalFinalStepIterations = CG_d.IterationsToComplete; | ||||||
|     TrueResidual = CG_d.TrueResidual; |  | ||||||
|  |  | ||||||
|     TotalTimer.Stop(); |     TotalTimer.Stop(); | ||||||
|     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; |     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; | ||||||
|   | |||||||
| @@ -52,7 +52,7 @@ public: | |||||||
|   MultiShiftFunction shifts; |   MultiShiftFunction shifts; | ||||||
|   std::vector<RealD> TrueResidualShift; |   std::vector<RealD> TrueResidualShift; | ||||||
|  |  | ||||||
|   ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :  |   ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :  | ||||||
|     MaxIterations(maxit), |     MaxIterations(maxit), | ||||||
|     shifts(_shifts) |     shifts(_shifts) | ||||||
|   {  |   {  | ||||||
| @@ -183,9 +183,6 @@ public: | |||||||
|       axpby(psi[s],0.,-bs[s]*alpha[s],src,src); |       axpby(psi[s],0.,-bs[s]*alpha[s],src,src); | ||||||
|     } |     } | ||||||
|    |    | ||||||
|     std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl; |  | ||||||
|      |  | ||||||
|    |  | ||||||
|   /////////////////////////////////////// |   /////////////////////////////////////// | ||||||
|   // Timers |   // Timers | ||||||
|   /////////////////////////////////////// |   /////////////////////////////////////// | ||||||
|   | |||||||
| @@ -1,411 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2015 |  | ||||||
|  |  | ||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
| Author: Christopher Kelly <ckelly@bnl.gov> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H |  | ||||||
| #define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| //CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.  |  | ||||||
| //The residual is stored in single precision, but the search directions and solution are stored in double precision.  |  | ||||||
| //Every update_freq iterations the residual is corrected in double precision.  |  | ||||||
|      |  | ||||||
| //For safety the a final regular CG is applied to clean up if necessary |  | ||||||
|  |  | ||||||
| //Linop to add shift to input linop, used in cleanup CG |  | ||||||
| namespace ConjugateGradientMultiShiftMixedPrecSupport{ |  | ||||||
| template<typename Field> |  | ||||||
| class ShiftedLinop: public LinearOperatorBase<Field>{ |  | ||||||
| public: |  | ||||||
|   LinearOperatorBase<Field> &linop_base; |  | ||||||
|   RealD shift; |  | ||||||
|  |  | ||||||
|   ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){} |  | ||||||
|  |  | ||||||
|   void OpDiag (const Field &in, Field &out){ assert(0); } |  | ||||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); } |  | ||||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } |  | ||||||
|    |  | ||||||
|   void Op     (const Field &in, Field &out){ assert(0); } |  | ||||||
|   void AdjOp  (const Field &in, Field &out){ assert(0); } |  | ||||||
|  |  | ||||||
|   void HermOp(const Field &in, Field &out){ |  | ||||||
|     linop_base.HermOp(in, out); |  | ||||||
|     axpy(out, shift, in, out); |  | ||||||
|   }     |  | ||||||
|  |  | ||||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ |  | ||||||
|     HermOp(in,out); |  | ||||||
|     ComplexD dot = innerProduct(in,out); |  | ||||||
|     n1=real(dot); |  | ||||||
|     n2=norm2(out); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class FieldD, class FieldF, |  | ||||||
| 	 typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, |  | ||||||
| 	 typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  |  | ||||||
| class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>, |  | ||||||
| 					     public OperatorFunction<FieldD> |  | ||||||
| { |  | ||||||
| public:                                                 |  | ||||||
|  |  | ||||||
|   using OperatorFunction<FieldD>::operator(); |  | ||||||
|  |  | ||||||
|   RealD   Tolerance; |  | ||||||
|   Integer MaxIterations; |  | ||||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion |  | ||||||
|   std::vector<int> IterationsToCompleteShift;  // Iterations for this shift |  | ||||||
|   int verbose; |  | ||||||
|   MultiShiftFunction shifts; |  | ||||||
|   std::vector<RealD> TrueResidualShift; |  | ||||||
|  |  | ||||||
|   int ReliableUpdateFreq; //number of iterations between reliable updates |  | ||||||
|  |  | ||||||
|   GridBase* SinglePrecGrid; //Grid for single-precision fields |  | ||||||
|   LinearOperatorBase<FieldF> &Linop_f; //single precision |  | ||||||
|  |  | ||||||
|   ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts, |  | ||||||
| 				       GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f, |  | ||||||
| 				       int _ReliableUpdateFreq |  | ||||||
| 				       ) :  |  | ||||||
|     MaxIterations(maxit),  shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq) |  | ||||||
|   {  |  | ||||||
|     verbose=1; |  | ||||||
|     IterationsToCompleteShift.resize(_shifts.order); |  | ||||||
|     TrueResidualShift.resize(_shifts.order); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi) |  | ||||||
|   { |  | ||||||
|     GridBase *grid = src.Grid(); |  | ||||||
|     int nshift = shifts.order; |  | ||||||
|     std::vector<FieldD> results(nshift,grid); |  | ||||||
|     (*this)(Linop,src,results,psi); |  | ||||||
|   } |  | ||||||
|   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi) |  | ||||||
|   { |  | ||||||
|     int nshift = shifts.order; |  | ||||||
|  |  | ||||||
|     (*this)(Linop,src,results); |  | ||||||
|    |  | ||||||
|     psi = shifts.norm*src; |  | ||||||
|     for(int i=0;i<nshift;i++){ |  | ||||||
|       psi = psi + shifts.residues[i]*results[i]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d) |  | ||||||
|   {  |  | ||||||
|     GridBase *DoublePrecGrid = src_d.Grid(); |  | ||||||
|     precisionChangeWorkspace wk_f_from_d(SinglePrecGrid, DoublePrecGrid); |  | ||||||
|     precisionChangeWorkspace wk_d_from_f(DoublePrecGrid, SinglePrecGrid); |  | ||||||
|  |  | ||||||
|     //////////////////////////////////////////////////////////////////////// |  | ||||||
|     // Convenience references to the info stored in "MultiShiftFunction" |  | ||||||
|     //////////////////////////////////////////////////////////////////////// |  | ||||||
|     int nshift = shifts.order; |  | ||||||
|  |  | ||||||
|     std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts" |  | ||||||
|     std::vector<RealD> &mresidual(shifts.tolerances); |  | ||||||
|     std::vector<RealD> alpha(nshift,1.0); |  | ||||||
|  |  | ||||||
|     //Double precision search directions |  | ||||||
|     FieldD p_d(DoublePrecGrid); |  | ||||||
|     std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision) |  | ||||||
|  |  | ||||||
|     FieldD tmp_d(DoublePrecGrid); |  | ||||||
|     FieldD r_d(DoublePrecGrid); |  | ||||||
|     FieldD mmp_d(DoublePrecGrid); |  | ||||||
|  |  | ||||||
|     assert(psi_d.size()==nshift); |  | ||||||
|     assert(mass.size()==nshift); |  | ||||||
|     assert(mresidual.size()==nshift); |  | ||||||
|    |  | ||||||
|     // dynamic sized arrays on stack; 2d is a pain with vector |  | ||||||
|     RealD  bs[nshift]; |  | ||||||
|     RealD  rsq[nshift]; |  | ||||||
|     RealD  z[nshift][2]; |  | ||||||
|     int     converged[nshift]; |  | ||||||
|    |  | ||||||
|     const int       primary =0; |  | ||||||
|    |  | ||||||
|     //Primary shift fields CG iteration |  | ||||||
|     RealD a,b,c,d; |  | ||||||
|     RealD cp,bp,qq; //prev |  | ||||||
|    |  | ||||||
|     // Matrix mult fields |  | ||||||
|     FieldF r_f(SinglePrecGrid); |  | ||||||
|     FieldF p_f(SinglePrecGrid); |  | ||||||
|     FieldF tmp_f(SinglePrecGrid); |  | ||||||
|     FieldF mmp_f(SinglePrecGrid); |  | ||||||
|     FieldF src_f(SinglePrecGrid); |  | ||||||
|     precisionChange(src_f, src_d, wk_f_from_d); |  | ||||||
|  |  | ||||||
|     // Check lightest mass |  | ||||||
|     for(int s=0;s<nshift;s++){ |  | ||||||
|       assert( mass[s]>= mass[primary] ); |  | ||||||
|       converged[s]=0; |  | ||||||
|     } |  | ||||||
|    |  | ||||||
|     // Wire guess to zero |  | ||||||
|     // Residuals "r" are src |  | ||||||
|     // First search direction "p" is also src |  | ||||||
|     cp = norm2(src_d); |  | ||||||
|  |  | ||||||
|     // Handle trivial case of zero src. |  | ||||||
|     if( cp == 0. ){ |  | ||||||
|       for(int s=0;s<nshift;s++){ |  | ||||||
| 	psi_d[s] = Zero(); |  | ||||||
| 	IterationsToCompleteShift[s] = 1; |  | ||||||
| 	TrueResidualShift[s] = 0.; |  | ||||||
|       } |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for(int s=0;s<nshift;s++){ |  | ||||||
|       rsq[s] = cp * mresidual[s] * mresidual[s]; |  | ||||||
|       std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl; |  | ||||||
|       ps_d[s] = src_d; |  | ||||||
|     } |  | ||||||
|     // r and p for primary |  | ||||||
|     r_f=src_f; //residual maintained in single |  | ||||||
|     p_f=src_f; |  | ||||||
|     p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys |  | ||||||
|    |  | ||||||
|     //MdagM+m[0] |  | ||||||
|     Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) |  | ||||||
|     axpy(mmp_f,mass[0],p_f,mmp_f); |  | ||||||
|     RealD rn = norm2(p_f); |  | ||||||
|     d += rn*mass[0]; |  | ||||||
|  |  | ||||||
|     b = -cp /d; |  | ||||||
|    |  | ||||||
|     // Set up the various shift variables |  | ||||||
|     int       iz=0; |  | ||||||
|     z[0][1-iz] = 1.0; |  | ||||||
|     z[0][iz]   = 1.0; |  | ||||||
|     bs[0]      = b; |  | ||||||
|     for(int s=1;s<nshift;s++){ |  | ||||||
|       z[s][1-iz] = 1.0; |  | ||||||
|       z[s][iz]   = 1.0/( 1.0 - b*(mass[s]-mass[0])); |  | ||||||
|       bs[s]      = b*z[s][iz];  |  | ||||||
|     } |  | ||||||
|    |  | ||||||
|     // r += b[0] A.p[0] |  | ||||||
|     // c= norm(r) |  | ||||||
|     c=axpy_norm(r_f,b,mmp_f,r_f); |  | ||||||
|    |  | ||||||
|     for(int s=0;s<nshift;s++) { |  | ||||||
|       axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d); |  | ||||||
|     } |  | ||||||
|    |  | ||||||
|     /////////////////////////////////////// |  | ||||||
|     // Timers |  | ||||||
|     /////////////////////////////////////// |  | ||||||
|     GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer; |  | ||||||
|  |  | ||||||
|     SolverTimer.Start(); |  | ||||||
|    |  | ||||||
|     // Iteration loop |  | ||||||
|     int k; |  | ||||||
|    |  | ||||||
|     for (k=1;k<=MaxIterations;k++){     |  | ||||||
|       a = c /cp; |  | ||||||
|  |  | ||||||
|       //Update double precision search direction by residual |  | ||||||
|       PrecChangeTimer.Start(); |  | ||||||
|       precisionChange(r_d, r_f, wk_d_from_f); |  | ||||||
|       PrecChangeTimer.Stop(); |  | ||||||
|  |  | ||||||
|       AXPYTimer.Start(); |  | ||||||
|       axpy(p_d,a,p_d,r_d);  |  | ||||||
|  |  | ||||||
|       for(int s=0;s<nshift;s++){ |  | ||||||
| 	if ( ! converged[s] ) {  |  | ||||||
| 	  if (s==0){ |  | ||||||
| 	    axpy(ps_d[s],a,ps_d[s],r_d); |  | ||||||
| 	  } else{ |  | ||||||
| 	    RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b); |  | ||||||
| 	    axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]); |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       AXPYTimer.Stop(); |  | ||||||
|  |  | ||||||
|       PrecChangeTimer.Start(); |  | ||||||
|       precisionChange(p_f, p_d, wk_f_from_d); //get back single prec search direction for linop |  | ||||||
|       PrecChangeTimer.Stop(); |  | ||||||
|  |  | ||||||
|       cp=c; |  | ||||||
|       MatrixTimer.Start();   |  | ||||||
|       Linop_f.HermOp(p_f,mmp_f);  |  | ||||||
|       d=real(innerProduct(p_f,mmp_f));     |  | ||||||
|       MatrixTimer.Stop();   |  | ||||||
|  |  | ||||||
|       AXPYTimer.Start(); |  | ||||||
|       axpy(mmp_f,mass[0],p_f,mmp_f); |  | ||||||
|       AXPYTimer.Stop(); |  | ||||||
|       RealD rn = norm2(p_f); |  | ||||||
|       d += rn*mass[0]; |  | ||||||
|      |  | ||||||
|       bp=b; |  | ||||||
|       b=-cp/d; |  | ||||||
|      |  | ||||||
|       // Toggle the recurrence history |  | ||||||
|       bs[0] = b; |  | ||||||
|       iz = 1-iz; |  | ||||||
|       ShiftTimer.Start(); |  | ||||||
|       for(int s=1;s<nshift;s++){ |  | ||||||
| 	if((!converged[s])){ |  | ||||||
| 	  RealD z0 = z[s][1-iz]; |  | ||||||
| 	  RealD z1 = z[s][iz]; |  | ||||||
| 	  z[s][iz] = z0*z1*bp |  | ||||||
| 	    / (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));  |  | ||||||
| 	  bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|       ShiftTimer.Stop(); |  | ||||||
|  |  | ||||||
|       //Update double precision solutions |  | ||||||
|       AXPYTimer.Start(); |  | ||||||
|       for(int s=0;s<nshift;s++){ |  | ||||||
| 	int ss = s; |  | ||||||
| 	if( (!converged[s]) ) {  |  | ||||||
| 	  axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]); |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       //Perform reliable update if necessary; otherwise update residual from single-prec mmp |  | ||||||
|       RealD c_f = axpy_norm(r_f,b,mmp_f,r_f); |  | ||||||
|       AXPYTimer.Stop(); |  | ||||||
|  |  | ||||||
|       c = c_f; |  | ||||||
|  |  | ||||||
|       if(k % ReliableUpdateFreq == 0){ |  | ||||||
| 	//Replace r with true residual |  | ||||||
| 	MatrixTimer.Start();   |  | ||||||
| 	Linop_d.HermOp(psi_d[0],mmp_d);  |  | ||||||
| 	MatrixTimer.Stop();   |  | ||||||
|  |  | ||||||
| 	AXPYTimer.Start(); |  | ||||||
| 	axpy(mmp_d,mass[0],psi_d[0],mmp_d); |  | ||||||
|  |  | ||||||
| 	RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d); |  | ||||||
| 	AXPYTimer.Stop(); |  | ||||||
|  |  | ||||||
| 	std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl; |  | ||||||
| 	 |  | ||||||
| 	PrecChangeTimer.Start(); |  | ||||||
| 	precisionChange(r_f, r_d, wk_f_from_d); |  | ||||||
| 	PrecChangeTimer.Stop(); |  | ||||||
| 	c = c_d; |  | ||||||
|       } |  | ||||||
|      |  | ||||||
|       // Convergence checks |  | ||||||
|       int all_converged = 1; |  | ||||||
|       for(int s=0;s<nshift;s++){ |  | ||||||
|        |  | ||||||
| 	if ( (!converged[s]) ){ |  | ||||||
| 	  IterationsToCompleteShift[s] = k; |  | ||||||
| 	 |  | ||||||
| 	  RealD css  = c * z[s][iz]* z[s][iz]; |  | ||||||
| 	 |  | ||||||
| 	  if(css<rsq[s]){ |  | ||||||
| 	    if ( ! converged[s] ) |  | ||||||
| 	      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl; |  | ||||||
| 	    converged[s]=1; |  | ||||||
| 	  } else { |  | ||||||
| 	    all_converged=0; |  | ||||||
| 	  } |  | ||||||
|  |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       if ( all_converged ){ |  | ||||||
|  |  | ||||||
| 	SolverTimer.Stop(); |  | ||||||
| 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl; |  | ||||||
| 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl; |  | ||||||
|        |  | ||||||
| 	// Check answers  |  | ||||||
| 	for(int s=0; s < nshift; s++) {  |  | ||||||
| 	  Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq); |  | ||||||
| 	  axpy(tmp_d,mass[s],psi_d[s],mmp_d); |  | ||||||
| 	  axpy(r_d,-alpha[s],src_d,tmp_d); |  | ||||||
| 	  RealD rn = norm2(r_d); |  | ||||||
| 	  RealD cn = norm2(src_d); |  | ||||||
| 	  TrueResidualShift[s] = std::sqrt(rn/cn); |  | ||||||
| 	  std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl; |  | ||||||
|  |  | ||||||
| 	  //If we have not reached the desired tolerance, do a (mixed precision) CG cleanup |  | ||||||
| 	  if(rn >= rsq[s]){ |  | ||||||
| 	    CleanupTimer.Start(); |  | ||||||
| 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl; |  | ||||||
|  |  | ||||||
| 	    //Setup linear operators for final cleanup |  | ||||||
| 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]); |  | ||||||
| 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]); |  | ||||||
| 					        |  | ||||||
| 	    MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);  |  | ||||||
| 	    cg(src_d, psi_d[s]); |  | ||||||
| 	     |  | ||||||
| 	    TrueResidualShift[s] = cg.TrueResidual; |  | ||||||
| 	    CleanupTimer.Stop(); |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\tSolver    " << SolverTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\t\tAXPY    " << AXPYTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\t\tMatrix    " << MatrixTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\t\tShift    " << ShiftTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed()     <<std::endl; |  | ||||||
| 	std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl; |  | ||||||
|  |  | ||||||
| 	IterationsToComplete = k;	 |  | ||||||
|  |  | ||||||
| 	return; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|     |  | ||||||
|     } |  | ||||||
|     // ugly hack |  | ||||||
|     std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl; |  | ||||||
|     //  assert(0); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| }; |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| #endif |  | ||||||
| @@ -54,23 +54,15 @@ class DeflatedGuesser: public LinearFunction<Field> { | |||||||
| private: | private: | ||||||
|   const std::vector<Field> &evec; |   const std::vector<Field> &evec; | ||||||
|   const std::vector<RealD> &eval; |   const std::vector<RealD> &eval; | ||||||
|   const unsigned int       N; |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) |   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; | ||||||
|   : DeflatedGuesser(_evec, _eval, _evec.size()) |  | ||||||
|   {} |  | ||||||
|  |  | ||||||
|   DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N) |  | ||||||
|   : evec(_evec), eval(_eval), N(_N) |  | ||||||
|   { |  | ||||||
|     assert(evec.size()==eval.size()); |  | ||||||
|     assert(N <= evec.size()); |  | ||||||
|   }  |  | ||||||
|  |  | ||||||
|   virtual void operator()(const Field &src,Field &guess) { |   virtual void operator()(const Field &src,Field &guess) { | ||||||
|     guess = Zero(); |     guess = Zero(); | ||||||
|  |     assert(evec.size()==eval.size()); | ||||||
|  |     auto N = evec.size(); | ||||||
|     for (int i=0;i<N;i++) { |     for (int i=0;i<N;i++) { | ||||||
|       const Field& tmp = evec[i]; |       const Field& tmp = evec[i]; | ||||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); |       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||||
|   | |||||||
| @@ -40,7 +40,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    *        (-MoeMee^{-1}   1 )    |    *        (-MoeMee^{-1}   1 )    | ||||||
|    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) |    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) | ||||||
|    *           ( 0       1                    ) |    *           ( 0       1                    ) | ||||||
|    * L^{-dag}= ( 1      -Mee^{-dag} Moe^{dag} ) |    * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} ) | ||||||
|    *           ( 0       1                    ) |    *           ( 0       1                    ) | ||||||
|    * |    * | ||||||
|    * U^-1 = (1   -Mee^{-1} Meo) |    * U^-1 = (1   -Mee^{-1} Meo) | ||||||
| @@ -82,8 +82,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o |    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o | ||||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) |    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||||
|    *                              psi_o = M_oo^-1 phi_o |    *                              psi_o = M_oo^-1 phi_o | ||||||
|    * |    * TODO: Deflation  | ||||||
|    * |  | ||||||
|    */ |    */ | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| @@ -98,7 +97,6 @@ namespace Grid { | |||||||
|   protected: |   protected: | ||||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; |     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||||
|     OperatorFunction<Field> & _HermitianRBSolver; |     OperatorFunction<Field> & _HermitianRBSolver; | ||||||
|      |  | ||||||
|     int CBfactorise; |     int CBfactorise; | ||||||
|     bool subGuess; |     bool subGuess; | ||||||
|     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver |     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver | ||||||
| @@ -134,31 +132,6 @@ namespace Grid { | |||||||
|       (*this)(_Matrix,in,out,guess); |       (*this)(_Matrix,in,out,guess); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)  |  | ||||||
|     { |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       Field tmp(grid); |  | ||||||
|       int nblock = in.size(); |  | ||||||
|       for(int b=0;b<nblock;b++){ |  | ||||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     // James can write his own deflated guesser |  | ||||||
|     // with optimised code for the inner products |  | ||||||
|     //    RedBlackSolveSplitGrid(); |  | ||||||
|     //    RedBlackSolve(_Matrix,src_o,sol_o);  |  | ||||||
|  |  | ||||||
|     void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out) |  | ||||||
|     { |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       Field tmp(grid); |  | ||||||
|       int nblock = in.size(); |  | ||||||
|       for(int b=0;b<nblock;b++) { |  | ||||||
| 	pickCheckerboard(Even,tmp,in[b]); |  | ||||||
| 	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     template<class Guesser> |     template<class Guesser> | ||||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  |     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  | ||||||
|     { |     { | ||||||
| @@ -177,27 +150,22 @@ namespace Grid { | |||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Prepare RedBlack source |       // Prepare RedBlack source | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       RedBlackSource(_Matrix,in,src_o); |       for(int b=0;b<nblock;b++){ | ||||||
| 	//      for(int b=0;b<nblock;b++){ | 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||||
| 	//	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); |       } | ||||||
| 	//      } |  | ||||||
|        |  | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       // Make the guesses |       // Make the guesses | ||||||
|       //////////////////////////////////////////////// |       //////////////////////////////////////////////// | ||||||
|       if ( subGuess ) guess_save.resize(nblock,grid); |       if ( subGuess ) guess_save.resize(nblock,grid); | ||||||
|  |  | ||||||
|        |  | ||||||
|       if(useSolnAsInitGuess) { |  | ||||||
|       for(int b=0;b<nblock;b++){ |       for(int b=0;b<nblock;b++){ | ||||||
|  |         if(useSolnAsInitGuess) { | ||||||
|           pickCheckerboard(Odd, sol_o[b], out[b]); |           pickCheckerboard(Odd, sol_o[b], out[b]); | ||||||
|         } |  | ||||||
|         } else { |         } else { | ||||||
|         guess(src_o, sol_o);  |           guess(src_o[b],sol_o[b]);  | ||||||
|         } |         } | ||||||
|  |  | ||||||
| 	if ( subGuess ) {  | 	if ( subGuess ) {  | ||||||
|         for(int b=0;b<nblock;b++){ |  | ||||||
| 	  guess_save[b] = sol_o[b]; | 	  guess_save[b] = sol_o[b]; | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
| @@ -222,18 +190,11 @@ namespace Grid { | |||||||
| 	// Check unprec residual if possible | 	// Check unprec residual if possible | ||||||
| 	///////////////////////////////////////////////// | 	///////////////////////////////////////////////// | ||||||
| 	if ( ! subGuess ) { | 	if ( ! subGuess ) { | ||||||
|  | 	  _Matrix.M(out[b],resid);  | ||||||
| 	  if ( this->adjoint() ) _Matrix.Mdag(out[b],resid);  |  | ||||||
| 	  else                   _Matrix.M(out[b],resid);  |  | ||||||
|  |  | ||||||
| 	  resid = resid-in[b]; | 	  resid = resid-in[b]; | ||||||
| 	  RealD ns = norm2(in[b]); | 	  RealD ns = norm2(in[b]); | ||||||
| 	  RealD nr = norm2(resid); | 	  RealD nr = norm2(resid); | ||||||
| 	 | 	 | ||||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; |  | ||||||
| 	  if ( this->adjoint() )  |  | ||||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; |  | ||||||
| 	  else                    |  | ||||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | 	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||||
| 	} else { | 	} else { | ||||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | ||||||
| @@ -288,21 +249,12 @@ namespace Grid { | |||||||
|  |  | ||||||
|       // Verify the unprec residual |       // Verify the unprec residual | ||||||
|       if ( ! subGuess ) { |       if ( ! subGuess ) { | ||||||
|  |         _Matrix.M(out,resid);  | ||||||
| 	std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; |  | ||||||
| 	 |  | ||||||
| 	if ( this->adjoint() ) _Matrix.Mdag(out,resid);  |  | ||||||
| 	else                   _Matrix.M(out,resid);  |  | ||||||
|  |  | ||||||
|         resid = resid-in; |         resid = resid-in; | ||||||
|         RealD ns = norm2(in); |         RealD ns = norm2(in); | ||||||
|         RealD nr = norm2(resid); |         RealD nr = norm2(resid); | ||||||
|  |  | ||||||
| 	  if ( this->adjoint() )  |         std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl; | ||||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; |  | ||||||
| 	  else                    |  | ||||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; |  | ||||||
|  |  | ||||||
|       } else { |       } else { | ||||||
|         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; |         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; | ||||||
|       } |       } | ||||||
| @@ -311,7 +263,6 @@ namespace Grid { | |||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
|     // Override in derived.  |     // Override in derived.  | ||||||
|     ///////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////// | ||||||
|     virtual bool adjoint(void) { return false; } |  | ||||||
|     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; |     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; | ||||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; |     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; | ||||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; |     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; | ||||||
| @@ -665,127 +616,6 @@ namespace Grid { | |||||||
|         this->_HermitianRBSolver(_OpEO, src_o, sol_o);  |         this->_HermitianRBSolver(_OpEO, src_o, sol_o);  | ||||||
|       } |       } | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   /* |  | ||||||
|    * Red black Schur decomposition |  | ||||||
|    * |  | ||||||
|    *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo) |  | ||||||
|    *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         ) |  | ||||||
|    *                =         L                     D                     U |  | ||||||
|    * |  | ||||||
|    * L^-1 = (1              0 ) |  | ||||||
|    *        (-MoeMee^{-1}   1 )    |  | ||||||
|    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) |  | ||||||
|    *           ( 0       1                    ) |  | ||||||
|    * |  | ||||||
|    * U^-1 = (1   -Mee^{-1} Meo) |  | ||||||
|    *        (0    1           ) |  | ||||||
|    * U^{dag} = ( 1                 0) |  | ||||||
|    *           (Meo^dag Mee^{-dag} 1) |  | ||||||
|    * U^{-dag} = (  1                 0) |  | ||||||
|    *            (-Meo^dag Mee^{-dag} 1) |  | ||||||
|    * |  | ||||||
|    * |  | ||||||
|    *********************** |  | ||||||
|    *     M^dag psi = eta |  | ||||||
|    *********************** |  | ||||||
|    * |  | ||||||
|    * Really for Mobius: (Wilson - easier to just use gamma 5 hermiticity) |  | ||||||
|    * |  | ||||||
|    *    Mdag psi     =         Udag  Ddag  Ldag psi = eta |  | ||||||
|    * |  | ||||||
|    * U^{-dag} = (  1                 0) |  | ||||||
|    *            (-Meo^dag Mee^{-dag} 1) |  | ||||||
|    * |  | ||||||
|    * |  | ||||||
|    * i)                D^dag phi =  (U^{-dag}  eta) |  | ||||||
|    *                        eta'_e = eta_e |  | ||||||
|    *                        eta'_o = (eta_o - Meo^dag Mee^{-dag} eta_e) |  | ||||||
|    *  |  | ||||||
|    *      phi_o = D_oo^-dag eta'_o = D_oo^-dag (eta_o - Meo^dag Mee^{-dag} eta_e) |  | ||||||
|    * |  | ||||||
|    *      phi_e = D_ee^-dag eta'_e = D_ee^-dag eta_e |  | ||||||
|    *  |  | ||||||
|    * Solve:  |  | ||||||
|    * |  | ||||||
|    *      D_oo D_oo^dag phi_o = D_oo (eta_o - Meo^dag Mee^{-dag} eta_e) |  | ||||||
|    * |  | ||||||
|    * ii)  |  | ||||||
|    *      phi = L^dag psi => psi = L^-dag phi.  |  | ||||||
|    * |  | ||||||
|    * L^{-dag} = ( 1      -Mee^{-dag} Moe^{dag} ) |  | ||||||
|    *            ( 0       1                    ) |  | ||||||
|    * |  | ||||||
|    *   => sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... |  | ||||||
|    *   => sol_o = phi_o |  | ||||||
|    */ |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Site diagonal has Mooee on it, but solve the Adjoint system |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   template<class Field> class SchurRedBlackDiagMooeeDagSolve : public SchurRedBlackBase<Field> { |  | ||||||
|   public: |  | ||||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; |  | ||||||
|  |  | ||||||
|     virtual bool adjoint(void) { return true; } |  | ||||||
|     SchurRedBlackDiagMooeeDagSolve(OperatorFunction<Field> &HermitianRBSolver, |  | ||||||
| 				   const bool initSubGuess = false, |  | ||||||
| 				   const bool _solnAsInitGuess = false)   |  | ||||||
|       : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; |  | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     // Override RedBlack specialisation |  | ||||||
|     ////////////////////////////////////////////////////// |  | ||||||
|     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) |  | ||||||
|     { |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       Field   tmp(grid); |  | ||||||
|       Field  Mtmp(grid); |  | ||||||
|  |  | ||||||
|       pickCheckerboard(Even,src_e,src); |  | ||||||
|       pickCheckerboard(Odd ,src_o,src); |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       // src_o = (source_o - Moe^dag MeeInvDag source_e) |  | ||||||
|       ///////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MooeeInvDag(src_e,tmp);  assert(  tmp.Checkerboard() ==Even); |  | ||||||
|       _Matrix.MeooeDag   (tmp,Mtmp);   assert( Mtmp.Checkerboard() ==Odd);      |  | ||||||
|       tmp=src_o-Mtmp;                  assert(  tmp.Checkerboard() ==Odd);      |  | ||||||
|  |  | ||||||
|       // get the right Mpc |  | ||||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|       _HermOpEO.Mpc(tmp,src_o);     assert(src_o.Checkerboard() ==Odd); |  | ||||||
|     } |  | ||||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) |  | ||||||
|     { |  | ||||||
|       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  |  | ||||||
|     }; |  | ||||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) |  | ||||||
|     { |  | ||||||
|       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); |  | ||||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); |  | ||||||
|     } |  | ||||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) |  | ||||||
|     { |  | ||||||
|       GridBase *grid = _Matrix.RedBlackGrid(); |  | ||||||
|       GridBase *fgrid= _Matrix.Grid(); |  | ||||||
|  |  | ||||||
|       Field  sol_e(grid); |  | ||||||
|       Field  tmp(grid); |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       // sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... |  | ||||||
|       // sol_o = phi_o |  | ||||||
|       /////////////////////////////////////////////////// |  | ||||||
|       _Matrix.MeooeDag(sol_o,tmp);      assert(tmp.Checkerboard()==Even); |  | ||||||
|       tmp = src_e-tmp;                  assert(tmp.Checkerboard()==Even); |  | ||||||
|       _Matrix.MooeeInvDag(tmp,sol_e);   assert(sol_e.Checkerboard()==Even); |  | ||||||
|        |  | ||||||
|       setCheckerboard(sol,sol_e); assert(  sol_e.Checkerboard() ==Even); |  | ||||||
|       setCheckerboard(sol,sol_o); assert(  sol_o.Checkerboard() ==Odd ); |  | ||||||
|     } |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -9,30 +9,14 @@ NAMESPACE_BEGIN(Grid); | |||||||
| #define AccSmall (3) | #define AccSmall (3) | ||||||
| #define Shared   (4) | #define Shared   (4) | ||||||
| #define SharedSmall (5) | #define SharedSmall (5) | ||||||
| #undef GRID_MM_VERBOSE  |  | ||||||
| uint64_t total_shared; | uint64_t total_shared; | ||||||
| uint64_t total_device; | uint64_t total_device; | ||||||
| uint64_t total_host;; | uint64_t total_host;; | ||||||
| void MemoryManager::PrintBytes(void) | void MemoryManager::PrintBytes(void) | ||||||
| { | { | ||||||
|   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; |   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : PrintBytes "<<std::endl; |   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; |   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared      Mbytes "<<std::endl; |  | ||||||
|   std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl; |  | ||||||
|   std::cout << " MemoryManager : "<<(total_host>>20)  <<" cpu         Mbytes "<<std::endl; |  | ||||||
|   uint64_t cacheBytes; |  | ||||||
|   cacheBytes = CacheBytes[Cpu]; |  | ||||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl; |  | ||||||
|   cacheBytes = CacheBytes[Acc]; |  | ||||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl; |  | ||||||
|   cacheBytes = CacheBytes[Shared]; |  | ||||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl; |  | ||||||
|    |  | ||||||
| #ifdef GRID_CUDA |  | ||||||
|   cuda_mem(); |  | ||||||
| #endif |  | ||||||
|    |  | ||||||
| } | } | ||||||
|  |  | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| @@ -40,114 +24,86 @@ void MemoryManager::PrintBytes(void) | |||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; | MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; | ||||||
| int MemoryManager::Victim[MemoryManager::NallocType]; | int MemoryManager::Victim[MemoryManager::NallocType]; | ||||||
| int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 }; | int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; | ||||||
| uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType]; |  | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| // Actual allocation and deallocation utils | // Actual allocation and deallocation utils | ||||||
| ////////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////////// | ||||||
| void *MemoryManager::AcceleratorAllocate(size_t bytes) | void *MemoryManager::AcceleratorAllocate(size_t bytes) | ||||||
| { | { | ||||||
|   total_device+=bytes; |  | ||||||
|   void *ptr = (void *) Lookup(bytes,Acc); |   void *ptr = (void *) Lookup(bytes,Acc); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocDevice(bytes); |     ptr = (void *) acceleratorAllocDevice(bytes); | ||||||
|  |     total_device+=bytes; | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"AcceleratorAllocate "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::AcceleratorFree    (void *ptr,size_t bytes) | void  MemoryManager::AcceleratorFree    (void *ptr,size_t bytes) | ||||||
| { | { | ||||||
|   total_device-=bytes; |  | ||||||
|   void *__freeme = Insert(ptr,bytes,Acc); |   void *__freeme = Insert(ptr,bytes,Acc); | ||||||
|   if ( __freeme ) { |   if ( __freeme ) { | ||||||
|     acceleratorFreeDevice(__freeme); |     acceleratorFreeDevice(__freeme); | ||||||
|  |     total_device-=bytes; | ||||||
|  |     //    PrintBytes(); | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"AcceleratorFree "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
| } | } | ||||||
| void *MemoryManager::SharedAllocate(size_t bytes) | void *MemoryManager::SharedAllocate(size_t bytes) | ||||||
| { | { | ||||||
|   total_shared+=bytes; |  | ||||||
|   void *ptr = (void *) Lookup(bytes,Shared); |   void *ptr = (void *) Lookup(bytes,Shared); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|  |     total_shared+=bytes; | ||||||
|  |     //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||||
|  |     //    PrintBytes(); | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"SharedAllocate "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::SharedFree    (void *ptr,size_t bytes) | void  MemoryManager::SharedFree    (void *ptr,size_t bytes) | ||||||
| { | { | ||||||
|   total_shared-=bytes; |  | ||||||
|   void *__freeme = Insert(ptr,bytes,Shared); |   void *__freeme = Insert(ptr,bytes,Shared); | ||||||
|   if ( __freeme ) { |   if ( __freeme ) { | ||||||
|     acceleratorFreeShared(__freeme); |     acceleratorFreeShared(__freeme); | ||||||
|  |     total_shared-=bytes; | ||||||
|  |     //    PrintBytes(); | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"SharedFree "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
| } | } | ||||||
| #ifdef GRID_UVM | #ifdef GRID_UVM | ||||||
| void *MemoryManager::CpuAllocate(size_t bytes) | void *MemoryManager::CpuAllocate(size_t bytes) | ||||||
| { | { | ||||||
|   total_host+=bytes; |  | ||||||
|   void *ptr = (void *) Lookup(bytes,Cpu); |   void *ptr = (void *) Lookup(bytes,Cpu); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|  |     total_host+=bytes; | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"CpuAllocate "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||||
| { | { | ||||||
|   total_host-=bytes; |  | ||||||
|   NotifyDeletion(_ptr); |   NotifyDeletion(_ptr); | ||||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); |   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||||
|   if ( __freeme ) {  |   if ( __freeme ) {  | ||||||
|     acceleratorFreeShared(__freeme); |     acceleratorFreeShared(__freeme); | ||||||
|  |     total_host-=bytes; | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"CpuFree "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
| } | } | ||||||
| #else | #else | ||||||
| void *MemoryManager::CpuAllocate(size_t bytes) | void *MemoryManager::CpuAllocate(size_t bytes) | ||||||
| { | { | ||||||
|   total_host+=bytes; |  | ||||||
|   void *ptr = (void *) Lookup(bytes,Cpu); |   void *ptr = (void *) Lookup(bytes,Cpu); | ||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocCpu(bytes); |     ptr = (void *) acceleratorAllocCpu(bytes); | ||||||
|  |     total_host+=bytes; | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"CpuAllocate "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||||
| { | { | ||||||
|   total_host-=bytes; |  | ||||||
|   NotifyDeletion(_ptr); |   NotifyDeletion(_ptr); | ||||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); |   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||||
|   if ( __freeme ) {  |   if ( __freeme ) {  | ||||||
|     acceleratorFreeCpu(__freeme); |     acceleratorFreeCpu(__freeme); | ||||||
|  |     total_host-=bytes; | ||||||
|   } |   } | ||||||
| #ifdef GRID_MM_VERBOSE |  | ||||||
|   std::cout <<"CpuFree "<<std::endl; |  | ||||||
|   PrintBytes(); |  | ||||||
| #endif |  | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| @@ -225,13 +181,13 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,int type) | |||||||
| #ifdef ALLOCATION_CACHE | #ifdef ALLOCATION_CACHE | ||||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); |   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||||
|   int cache = type + small; |   int cache = type + small; | ||||||
|   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);   |   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache]);   | ||||||
| #else | #else | ||||||
|   return ptr; |   return ptr; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)  | void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim)  | ||||||
| { | { | ||||||
|   assert(ncache>0); |   assert(ncache>0); | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| @@ -255,7 +211,6 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|  |  | ||||||
|   if ( entries[v].valid ) { |   if ( entries[v].valid ) { | ||||||
|     ret = entries[v].address; |     ret = entries[v].address; | ||||||
|     cacheBytes -= entries[v].bytes; |  | ||||||
|     entries[v].valid = 0; |     entries[v].valid = 0; | ||||||
|     entries[v].address = NULL; |     entries[v].address = NULL; | ||||||
|     entries[v].bytes = 0; |     entries[v].bytes = 0; | ||||||
| @@ -264,7 +219,6 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|   entries[v].address=ptr; |   entries[v].address=ptr; | ||||||
|   entries[v].bytes  =bytes; |   entries[v].bytes  =bytes; | ||||||
|   entries[v].valid  =1; |   entries[v].valid  =1; | ||||||
|   cacheBytes += bytes; |  | ||||||
|  |  | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
| @@ -274,13 +228,13 @@ void *MemoryManager::Lookup(size_t bytes,int type) | |||||||
| #ifdef ALLOCATION_CACHE | #ifdef ALLOCATION_CACHE | ||||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); |   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||||
|   int cache = type+small; |   int cache = type+small; | ||||||
|   return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]); |   return Lookup(bytes,Entries[cache],Ncache[cache]); | ||||||
| #else | #else | ||||||
|   return NULL; |   return NULL; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)  | void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache)  | ||||||
| { | { | ||||||
|   assert(ncache>0); |   assert(ncache>0); | ||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| @@ -289,7 +243,6 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach | |||||||
|   for(int e=0;e<ncache;e++){ |   for(int e=0;e<ncache;e++){ | ||||||
|     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { |     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { | ||||||
|       entries[e].valid = 0; |       entries[e].valid = 0; | ||||||
|       cacheBytes -= entries[e].bytes; |  | ||||||
|       return entries[e].address; |       return entries[e].address; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -82,15 +82,14 @@ private: | |||||||
|   static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; |   static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; | ||||||
|   static int Victim[NallocType]; |   static int Victim[NallocType]; | ||||||
|   static int Ncache[NallocType]; |   static int Ncache[NallocType]; | ||||||
|   static uint64_t CacheBytes[NallocType]; |  | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////// |   ///////////////////////////////////////////////// | ||||||
|   // Free pool |   // Free pool | ||||||
|   ///////////////////////////////////////////////// |   ///////////////////////////////////////////////// | ||||||
|   static void *Insert(void *ptr,size_t bytes,int type) ; |   static void *Insert(void *ptr,size_t bytes,int type) ; | ||||||
|   static void *Lookup(size_t bytes,int type) ; |   static void *Lookup(size_t bytes,int type) ; | ||||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ; |   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; | ||||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ; |   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; | ||||||
|  |  | ||||||
|   static void PrintBytes(void); |   static void PrintBytes(void); | ||||||
|  public: |  public: | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ | |||||||
|  |  | ||||||
| #warning "Using explicit device memory copies" | #warning "Using explicit device memory copies" | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
| //#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | //define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | ||||||
| #define dprintf(...) | #define dprintf(...) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -429,7 +429,6 @@ void  MemoryManager::NotifyDeletion(void *_ptr) | |||||||
| } | } | ||||||
| void  MemoryManager::Print(void) | void  MemoryManager::Print(void) | ||||||
| { | { | ||||||
|   PrintBytes(); |  | ||||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; |   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||||
|   std::cout << GridLogDebug << "Memory Manager                             " << std::endl; |   std::cout << GridLogDebug << "Memory Manager                             " << std::endl; | ||||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; |   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||||
|   | |||||||
| @@ -33,8 +33,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| bool Stencil_force_mpi = true; |  | ||||||
|  |  | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -35,8 +35,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| extern bool Stencil_force_mpi ; |  | ||||||
|  |  | ||||||
| class CartesianCommunicator : public SharedMemory { | class CartesianCommunicator : public SharedMemory { | ||||||
|  |  | ||||||
| public:     | public:     | ||||||
|   | |||||||
| @@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|   double off_node_bytes=0.0; |   double off_node_bytes=0.0; | ||||||
|   int tag; |   int tag; | ||||||
|  |  | ||||||
|   if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) { |   if ( gfrom ==MPI_UNDEFINED) { | ||||||
|     tag= dir+from*32; |     tag= dir+from*32; | ||||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); |     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
| @@ -378,18 +378,12 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | |||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { |   if ( gdest == MPI_UNDEFINED ) { | ||||||
|     tag= dir+_processor*32; |     tag= dir+_processor*32; | ||||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); |     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); | ||||||
|     assert(ierr==0); |     assert(ierr==0); | ||||||
|     list.push_back(xrq); |     list.push_back(xrq); | ||||||
|     off_node_bytes+=bytes; |     off_node_bytes+=bytes; | ||||||
|   } else { |  | ||||||
|     // TODO : make a OMP loop on CPU, call threaded bcopy |  | ||||||
|     void *shm = (void *) this->ShmBufferTranslate(dest,recv); |  | ||||||
|     assert(shm!=NULL); |  | ||||||
|     acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); |  | ||||||
|     acceleratorCopySynchronise(); // MPI prob slower |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { |   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { | ||||||
|   | |||||||
| @@ -35,9 +35,6 @@ Author: Christoph Lehner <christoph@lhnr.de> | |||||||
| #endif | #endif | ||||||
| #ifdef GRID_HIP | #ifdef GRID_HIP | ||||||
| #include <hip/hip_runtime_api.h> | #include <hip/hip_runtime_api.h> | ||||||
| #endif |  | ||||||
| #ifdef GRID_SYCl |  | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid);  | NAMESPACE_BEGIN(Grid);  | ||||||
| @@ -73,7 +70,6 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) | |||||||
|   WorldNodes = WorldSize/WorldShmSize; |   WorldNodes = WorldSize/WorldShmSize; | ||||||
|   assert( (WorldNodes * WorldShmSize) == WorldSize ); |   assert( (WorldNodes * WorldShmSize) == WorldSize ); | ||||||
|  |  | ||||||
|  |  | ||||||
|   // FIXME: Check all WorldShmSize are the same ? |   // FIXME: Check all WorldShmSize are the same ? | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
| @@ -450,47 +446,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Hugetlbfs mapping intended | // Hugetlbfs mapping intended | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| #if defined(GRID_CUDA) ||defined(GRID_HIP)  || defined(GRID_SYCL) | #if defined(GRID_CUDA) ||defined(GRID_HIP) | ||||||
|  |  | ||||||
| //if defined(GRID_SYCL) |  | ||||||
| #if 0 |  | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) |  | ||||||
| { |  | ||||||
|   void * ShmCommBuf ;  |  | ||||||
|   assert(_ShmSetup==1); |  | ||||||
|   assert(_ShmAlloc==0); |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // allocate the pointer array for shared windows for our group |  | ||||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   MPI_Barrier(WorldShmComm); |  | ||||||
|   WorldShmCommBufs.resize(WorldShmSize); |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Each MPI rank should allocate our own buffer |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |  | ||||||
|   ShmCommBuf = acceleratorAllocDevice(bytes); |  | ||||||
|  |  | ||||||
|   if (ShmCommBuf == (void *)NULL ) { |  | ||||||
|     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; |  | ||||||
|     exit(EXIT_FAILURE);   |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  |  | ||||||
| 	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; |  | ||||||
|  |  | ||||||
|   SharedMemoryZero(ShmCommBuf,bytes); |  | ||||||
|  |  | ||||||
|   assert(WorldShmSize == 1); |  | ||||||
|   for(int r=0;r<WorldShmSize;r++){ |  | ||||||
|     WorldShmCommBufs[r] = ShmCommBuf; |  | ||||||
|   } |  | ||||||
|   _ShmAllocBytes=bytes; |  | ||||||
|   _ShmAlloc=1; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)   |  | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| { | { | ||||||
|   void * ShmCommBuf ;  |   void * ShmCommBuf ;  | ||||||
| @@ -514,16 +470,18 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   // Each MPI rank should allocate our own buffer |   // Each MPI rank should allocate our own buffer | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   ShmCommBuf = acceleratorAllocDevice(bytes); |   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||||
|  |  | ||||||
|   if (ShmCommBuf == (void *)NULL ) { |   if (ShmCommBuf == (void *)NULL ) { | ||||||
|     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; |     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||||
|     exit(EXIT_FAILURE);   |     exit(EXIT_FAILURE);   | ||||||
|   } |   } | ||||||
|   if ( WorldRank == 0 ){ |   //  if ( WorldRank == 0 ){ | ||||||
|  |   if ( 1 ){ | ||||||
|     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  |     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||||
|   } |   } | ||||||
|   SharedMemoryZero(ShmCommBuf,bytes); |   SharedMemoryZero(ShmCommBuf,bytes); | ||||||
|   std::cout<< "Setting up IPC"<<std::endl; |  | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Loop over ranks/gpu's on our node |   // Loop over ranks/gpu's on our node | ||||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -533,29 +491,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // If it is me, pass around the IPC access key |     // If it is me, pass around the IPC access key | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     void * thisBuf = ShmCommBuf; |  | ||||||
|     if(!Stencil_force_mpi) { |  | ||||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC |  | ||||||
|     typedef struct { int fd; pid_t pid ; } clone_mem_t; |  | ||||||
|  |  | ||||||
|     auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); |  | ||||||
|     auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); |  | ||||||
|        |  | ||||||
|     ze_ipc_mem_handle_t ihandle; |  | ||||||
|     clone_mem_t handle; |  | ||||||
|  |  | ||||||
|     if ( r==WorldShmRank ) {  |  | ||||||
|       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); |  | ||||||
|       if ( err != ZE_RESULT_SUCCESS ) { |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; |  | ||||||
| 	exit(EXIT_FAILURE); |  | ||||||
|       } else { |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; |  | ||||||
|       } |  | ||||||
|       memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int)); |  | ||||||
|       handle.pid = getpid(); |  | ||||||
|     } |  | ||||||
| #endif |  | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     cudaIpcMemHandle_t handle; |     cudaIpcMemHandle_t handle; | ||||||
|     if ( r==WorldShmRank ) {  |     if ( r==WorldShmRank ) {  | ||||||
| @@ -576,7 +511,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
|     // Share this IPC handle across the Shm Comm |     // Share this IPC handle across the Shm Comm | ||||||
|     ////////////////////////////////////////////////// |     ////////////////////////////////////////////////// | ||||||
| @@ -592,35 +526,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // If I am not the source, overwrite thisBuf with remote buffer |     // If I am not the source, overwrite thisBuf with remote buffer | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|  |     void * thisBuf = ShmCommBuf; | ||||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC |  | ||||||
|     if ( r!=WorldShmRank ) { |  | ||||||
|       thisBuf = nullptr; |  | ||||||
|       std::cout<<"mapping seeking remote pid/fd " |  | ||||||
| 	       <<handle.pid<<"/" |  | ||||||
| 	       <<handle.fd<<std::endl; |  | ||||||
|  |  | ||||||
|       int pidfd = syscall(SYS_pidfd_open,handle.pid,0); |  | ||||||
|       std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n"; |  | ||||||
|       //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0); |  | ||||||
|       int myfd  = syscall(438,pidfd,handle.fd,0); |  | ||||||
|  |  | ||||||
|       std::cout<<"Using IpcHandle myfd "<<myfd<<"\n"; |  | ||||||
|        |  | ||||||
|       memcpy((void *)&ihandle,(void *)&myfd,sizeof(int)); |  | ||||||
|  |  | ||||||
|       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf); |  | ||||||
|       if ( err != ZE_RESULT_SUCCESS ) { |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  |  | ||||||
| 	exit(EXIT_FAILURE); |  | ||||||
|       } else { |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl; |  | ||||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl; |  | ||||||
|       } |  | ||||||
|       assert(thisBuf!=nullptr); |  | ||||||
|     } |  | ||||||
| #endif |  | ||||||
| #ifdef GRID_CUDA | #ifdef GRID_CUDA | ||||||
|     if ( r!=WorldShmRank ) {  |     if ( r!=WorldShmRank ) {  | ||||||
|       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); |       auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); | ||||||
| @@ -642,7 +548,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     // Save a copy of the device buffers |     // Save a copy of the device buffers | ||||||
|     /////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////// | ||||||
|     } |  | ||||||
|     WorldShmCommBufs[r] = thisBuf; |     WorldShmCommBufs[r] = thisBuf; | ||||||
| #else | #else | ||||||
|     WorldShmCommBufs[r] = ShmCommBuf; |     WorldShmCommBufs[r] = ShmCommBuf; | ||||||
| @@ -652,8 +557,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
|   _ShmAllocBytes=bytes; |   _ShmAllocBytes=bytes; | ||||||
|   _ShmAlloc=1; |   _ShmAlloc=1; | ||||||
| } | } | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #else  | #else  | ||||||
| #ifdef GRID_MPI3_SHMMMAP | #ifdef GRID_MPI3_SHMMMAP | ||||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||||
| @@ -824,16 +727,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | |||||||
| ///////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////// | ||||||
| void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | #ifdef GRID_CUDA | ||||||
|   acceleratorMemSet(dest,0,bytes); |   cudaMemset(dest,0,bytes); | ||||||
| #else | #else | ||||||
|   bzero(dest,bytes); |   bzero(dest,bytes); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) | ||||||
| { | { | ||||||
| #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) | #ifdef GRID_CUDA | ||||||
|   acceleratorCopyToDevice(src,dest,bytes); |   cudaMemcpy(dest,src,bytes,cudaMemcpyDefault); | ||||||
| #else    | #else    | ||||||
|   bcopy(src,dest,bytes); |   bcopy(src,dest,bytes); | ||||||
| #endif | #endif | ||||||
| @@ -897,7 +800,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) | |||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   //SharedMemoryTest(); |   SharedMemoryTest(); | ||||||
| } | } | ||||||
| ////////////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////////////// | ||||||
| // On node barrier | // On node barrier | ||||||
|   | |||||||
| @@ -46,4 +46,3 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/lattice/Lattice_unary.h> | #include <Grid/lattice/Lattice_unary.h> | ||||||
| #include <Grid/lattice/Lattice_transfer.h> | #include <Grid/lattice/Lattice_transfer.h> | ||||||
| #include <Grid/lattice/Lattice_basis.h> | #include <Grid/lattice/Lattice_basis.h> | ||||||
| #include <Grid/lattice/Lattice_crc.h> |  | ||||||
|   | |||||||
| @@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> & | |||||||
|   autoView( x_v , x, AcceleratorRead); |   autoView( x_v , x, AcceleratorRead); | ||||||
|   autoView( y_v , y, AcceleratorRead); |   autoView( y_v , y, AcceleratorRead); | ||||||
|   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ |   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ | ||||||
|     auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]); |     auto tmp = a*x_v(ss)+y_v(ss); | ||||||
|     coalescedWrite(ret_v[ss],tmp); |     coalescedWrite(ret_v[ss],tmp); | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
|  |  | ||||||
| 	for(int k=k0; k<k1; ++k){ | 	for(int k=k0; k<k1; ++k){ | ||||||
| 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | ||||||
| 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss])); | 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); | ||||||
| 	} | 	} | ||||||
|       }); |       }); | ||||||
|  |  | ||||||
| @@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
| 	int jj  =j0+j; | 	int jj  =j0+j; | ||||||
| 	int ss =sj/nrot; | 	int ss =sj/nrot; | ||||||
| 	int sss=ss+s; | 	int sss=ss+s; | ||||||
| 	coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); | 	coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||||
|       }); |       }); | ||||||
|   } |   } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,55 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/lattice/Lattice_crc.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2021 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1) |  | ||||||
| { |  | ||||||
|   auto ff = localNorm2(f); |  | ||||||
|   if ( mu==-1 ) mu = f.Grid()->Nd()-1; |  | ||||||
|   typedef typename vobj::tensor_reduced normtype; |  | ||||||
|   typedef typename normtype::scalar_object scalar; |  | ||||||
|   std::vector<scalar> sff; |  | ||||||
|   sliceSum(ff,sff,mu); |  | ||||||
|   for(int t=0;t<sff.size();t++){ |  | ||||||
|     std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class vobj> uint32_t crc(Lattice<vobj> & buf) |  | ||||||
| { |  | ||||||
|   autoView( buf_v , buf, CpuRead); |  | ||||||
|   return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites()); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -361,7 +361,6 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   // But easily avoided by using double precision fields |   // But easily avoided by using double precision fields | ||||||
|   /////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////// | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|   typedef typename vobj::scalar_object::scalar_type scalar_type; |  | ||||||
|   GridBase  *grid = Data.Grid(); |   GridBase  *grid = Data.Grid(); | ||||||
|   assert(grid!=NULL); |   assert(grid!=NULL); | ||||||
|  |  | ||||||
| @@ -420,19 +419,20 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   // sum over nodes. |   // sum over nodes. | ||||||
|  |   sobj gsum; | ||||||
|   for(int t=0;t<fd;t++){ |   for(int t=0;t<fd;t++){ | ||||||
|     int pt = t/ld; // processor plane |     int pt = t/ld; // processor plane | ||||||
|     int lt = t%ld; |     int lt = t%ld; | ||||||
|     if ( pt == grid->_processor_coor[orthogdim] ) { |     if ( pt == grid->_processor_coor[orthogdim] ) { | ||||||
|       result[t]=lsSum[lt]; |       gsum=lsSum[lt]; | ||||||
|     } else { |     } else { | ||||||
|       result[t]=Zero(); |       gsum=Zero(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     grid->GlobalSum(gsum); | ||||||
|  |  | ||||||
|  |     result[t]=gsum; | ||||||
|   } |   } | ||||||
|   scalar_type * ptr = (scalar_type *) &result[0]; |  | ||||||
|   int words = fd*sizeof(sobj)/sizeof(scalar_type); |  | ||||||
|   grid->GlobalSumVector(ptr, words); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
|   | |||||||
| @@ -32,9 +32,8 @@ | |||||||
| #include <random> | #include <random> | ||||||
|  |  | ||||||
| #ifdef RNG_SITMO | #ifdef RNG_SITMO | ||||||
| #include <Grid/random/sitmo_prng_engine.hpp> | #include <Grid/sitmo_rng/sitmo_prng_engine.hpp> | ||||||
| #endif  | #endif  | ||||||
| #include <Grid/random/gaussian.h> |  | ||||||
|  |  | ||||||
| #if defined(RNG_SITMO) | #if defined(RNG_SITMO) | ||||||
| #define RNG_FAST_DISCARD | #define RNG_FAST_DISCARD | ||||||
| @@ -143,7 +142,7 @@ public: | |||||||
|  |  | ||||||
|   std::vector<RngEngine>                             _generators; |   std::vector<RngEngine>                             _generators; | ||||||
|   std::vector<std::uniform_real_distribution<RealD> > _uniform; |   std::vector<std::uniform_real_distribution<RealD> > _uniform; | ||||||
|   std::vector<Grid::gaussian_distribution<RealD> >    _gaussian; |   std::vector<std::normal_distribution<RealD> >       _gaussian; | ||||||
|   std::vector<std::discrete_distribution<int32_t> >   _bernoulli; |   std::vector<std::discrete_distribution<int32_t> >   _bernoulli; | ||||||
|   std::vector<std::uniform_int_distribution<uint32_t> > _uid; |   std::vector<std::uniform_int_distribution<uint32_t> > _uid; | ||||||
|  |  | ||||||
| @@ -244,7 +243,7 @@ public: | |||||||
|   GridSerialRNG() : GridRNGbase() { |   GridSerialRNG() : GridRNGbase() { | ||||||
|     _generators.resize(1); |     _generators.resize(1); | ||||||
|     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); |     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|     _gaussian.resize(1,gaussian_distribution<RealD>(0.0,1.0) ); |     _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) ); | ||||||
|     _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
| @@ -358,7 +357,7 @@ public: | |||||||
|  |  | ||||||
|     _generators.resize(_vol); |     _generators.resize(_vol); | ||||||
|     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); |     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); | ||||||
|     _gaussian.resize(_vol,gaussian_distribution<RealD>(0.0,1.0) ); |     _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) ); | ||||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); |     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); |     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -364,21 +364,15 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|   autoView( coarseData_ , coarseData, AcceleratorWrite); |   autoView( coarseData_ , coarseData, AcceleratorWrite); | ||||||
|   autoView( fineData_   , fineData, AcceleratorRead); |   autoView( fineData_   , fineData, AcceleratorRead); | ||||||
|  |  | ||||||
|   auto coarseData_p = &coarseData_[0]; |  | ||||||
|   auto fineData_p = &fineData_[0]; |  | ||||||
|    |  | ||||||
|   Coordinate fine_rdimensions = fine->_rdimensions; |   Coordinate fine_rdimensions = fine->_rdimensions; | ||||||
|   Coordinate coarse_rdimensions = coarse->_rdimensions; |   Coordinate coarse_rdimensions = coarse->_rdimensions; | ||||||
|    |    | ||||||
|   vobj zz = Zero(); |  | ||||||
|    |  | ||||||
|   accelerator_for(sc,coarse->oSites(),1,{ |   accelerator_for(sc,coarse->oSites(),1,{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|  |       coarseData_[sc]=Zero(); | ||||||
|       vobj cd = zz; |  | ||||||
|  |  | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
| @@ -389,11 +383,9 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||||
|  |  | ||||||
| 	cd=cd+fineData_p[sf]; | 	coarseData_[sc]=coarseData_[sc]+fineData_[sf]; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       coarseData_p[sc] = cd; |  | ||||||
|  |  | ||||||
|     }); |     }); | ||||||
|   return; |   return; | ||||||
| } | } | ||||||
| @@ -785,7 +777,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||||
| { | { | ||||||
|   typedef typename vobj::scalar_object sobj; |   typedef typename vobj::scalar_object sobj; | ||||||
|  |  | ||||||
| @@ -1010,95 +1002,53 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | |||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
| //The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls |  | ||||||
| class precisionChangeWorkspace{ |  | ||||||
|   std::pair<Integer,Integer>* fmap_device; //device pointer |  | ||||||
| public: |  | ||||||
|   precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid){ |  | ||||||
|     //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device |  | ||||||
|     assert(out_grid->Nd() == in_grid->Nd()); |  | ||||||
|     for(int d=0;d<out_grid->Nd();d++){ |  | ||||||
|       assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]); |  | ||||||
|     } |  | ||||||
|     int Nsimd_out = out_grid->Nsimd(); |  | ||||||
|  |  | ||||||
|     std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these |  | ||||||
|     for(int lane=0; lane < out_grid->Nsimd(); lane++) |  | ||||||
|       out_grid->iCoorFromIindex(out_icorrs[lane], lane); |  | ||||||
|    |  | ||||||
|     std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd |  | ||||||
|     thread_for(out_oidx,out_grid->oSites(),{ |  | ||||||
| 	Coordinate out_ocorr;  |  | ||||||
| 	out_grid->oCoorFromOindex(out_ocorr, out_oidx); |  | ||||||
|        |  | ||||||
| 	Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate) |  | ||||||
| 	for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ |  | ||||||
| 	  out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr); |  | ||||||
| 	 |  | ||||||
| 	  //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr); |  | ||||||
| 	  //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice |  | ||||||
| 	  //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity |  | ||||||
| 	  int in_oidx = 0, in_lane = 0; |  | ||||||
| 	  for(int d=0;d<in_grid->_ndimension;d++){ |  | ||||||
| 	    in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] ); |  | ||||||
| 	    in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] ); |  | ||||||
| 	  } |  | ||||||
| 	  fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane ); |  | ||||||
| 	} |  | ||||||
|       }); |  | ||||||
|  |  | ||||||
|     //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines) |  | ||||||
|     size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>); |  | ||||||
|     fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes); |  | ||||||
|     acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes);  |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   //Prevent moving or copying |  | ||||||
|   precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete; |  | ||||||
|   precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete; |  | ||||||
|   precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete; |  | ||||||
|   precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete; |  | ||||||
|    |  | ||||||
|   std::pair<Integer,Integer> const* getMap() const{ return fmap_device; } |  | ||||||
|  |  | ||||||
|   ~precisionChangeWorkspace(){ |  | ||||||
|     acceleratorFreeDevice(fmap_device); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //Convert a lattice of one precision to another. The input workspace contains the mapping data. |  | ||||||
| template<class VobjOut, class VobjIn> |  | ||||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){ |  | ||||||
|   static_assert( std::is_same<typename VobjOut::DoublePrecision, typename VobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same |  | ||||||
|  |  | ||||||
|   out.Checkerboard() = in.Checkerboard(); |  | ||||||
|   constexpr int Nsimd_out = VobjOut::Nsimd(); |  | ||||||
|  |  | ||||||
|   std::pair<Integer,Integer> const* fmap_device = workspace.getMap(); |  | ||||||
|  |  | ||||||
|   //Do the copy/precision change |  | ||||||
|   autoView( out_v , out, AcceleratorWrite); |  | ||||||
|   autoView( in_v , in, AcceleratorRead); |  | ||||||
|  |  | ||||||
|   accelerator_for(out_oidx, out.Grid()->oSites(), 1,{ |  | ||||||
|       std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out; |  | ||||||
|       for(int out_lane=0; out_lane < Nsimd_out; out_lane++){       |  | ||||||
| 	int in_oidx = fmap_osite[out_lane].first; |  | ||||||
| 	int in_lane = fmap_osite[out_lane].second; |  | ||||||
| 	copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane); |  | ||||||
|       } |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| //Convert a Lattice from one precision to another | //Convert a Lattice from one precision to another | ||||||
| //Generate the workspace in place; if multiple calls with the same mapping are performed, consider pregenerating the workspace and reusing |  | ||||||
| template<class VobjOut, class VobjIn> | template<class VobjOut, class VobjIn> | ||||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in) | ||||||
|   precisionChangeWorkspace workspace(out.Grid(), in.Grid()); | { | ||||||
|   precisionChange(out, in, workspace); |   assert(out.Grid()->Nd() == in.Grid()->Nd()); | ||||||
| } |   for(int d=0;d<out.Grid()->Nd();d++){ | ||||||
|  |     assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]); | ||||||
|  |   } | ||||||
|  |   out.Checkerboard() = in.Checkerboard(); | ||||||
|  |   GridBase *in_grid=in.Grid(); | ||||||
|  |   GridBase *out_grid = out.Grid(); | ||||||
|  |  | ||||||
|  |   typedef typename VobjOut::scalar_object SobjOut; | ||||||
|  |   typedef typename VobjIn::scalar_object SobjIn; | ||||||
|  |  | ||||||
|  |   int ndim = out.Grid()->Nd(); | ||||||
|  |   int out_nsimd = out_grid->Nsimd(); | ||||||
|  |      | ||||||
|  |   std::vector<Coordinate > out_icoor(out_nsimd); | ||||||
|  |        | ||||||
|  |   for(int lane=0; lane < out_nsimd; lane++){ | ||||||
|  |     out_icoor[lane].resize(ndim); | ||||||
|  |     out_grid->iCoorFromIindex(out_icoor[lane], lane); | ||||||
|  |   } | ||||||
|  |          | ||||||
|  |   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||||
|  |   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||||
|  |      | ||||||
|  |   autoView( out_v , out, CpuWrite); | ||||||
|  |   thread_for(out_oidx,out_grid->oSites(),{ | ||||||
|  |     Coordinate out_ocoor(ndim); | ||||||
|  |     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||||
|  |  | ||||||
|  |     ExtractPointerArray<SobjOut> ptrs(out_nsimd);       | ||||||
|  |  | ||||||
|  |     Coordinate lcoor(out_grid->Nd()); | ||||||
|  |        | ||||||
|  |     for(int lane=0; lane < out_nsimd; lane++){ | ||||||
|  |       for(int mu=0;mu<ndim;mu++) | ||||||
|  | 	lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu]; | ||||||
|  | 	 | ||||||
|  |       int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); | ||||||
|  |       ptrs[lane] = &in_slex_conv[llex]; | ||||||
|  |     } | ||||||
|  |     merge(out_v[out_oidx], ptrs, 0); | ||||||
|  |   }); | ||||||
|  | } | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| // Communicate between grids | // Communicate between grids | ||||||
|   | |||||||
| @@ -69,7 +69,6 @@ GridLogger GridLogDebug  (1, "Debug", GridLogColours, "PURPLE"); | |||||||
| GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | ||||||
| GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | ||||||
| GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | ||||||
| GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); |  | ||||||
|  |  | ||||||
| void GridLogConfigure(std::vector<std::string> &logstreams) { | void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||||
|   GridLogError.Active(0); |   GridLogError.Active(0); | ||||||
| @@ -80,7 +79,6 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
|   GridLogPerformance.Active(0); |   GridLogPerformance.Active(0); | ||||||
|   GridLogIntegrator.Active(1); |   GridLogIntegrator.Active(1); | ||||||
|   GridLogColours.Active(0); |   GridLogColours.Active(0); | ||||||
|   GridLogHMC.Active(1); |  | ||||||
|  |  | ||||||
|   for (int i = 0; i < logstreams.size(); i++) { |   for (int i = 0; i < logstreams.size(); i++) { | ||||||
|     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1); |     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1); | ||||||
| @@ -89,8 +87,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | |||||||
|     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1); |     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1); | ||||||
|     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1); |     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1); | ||||||
|     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); |     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); | ||||||
|     if (logstreams[i] == std::string("NoIntegrator"))  GridLogIntegrator.Active(0); |     if (logstreams[i] == std::string("Integrator"))  GridLogIntegrator.Active(1); | ||||||
|     if (logstreams[i] == std::string("NoHMC"))         GridLogHMC.Active(0); |  | ||||||
|     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1); |     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -182,7 +182,6 @@ extern GridLogger GridLogDebug  ; | |||||||
| extern GridLogger GridLogPerformance; | extern GridLogger GridLogPerformance; | ||||||
| extern GridLogger GridLogIterative  ; | extern GridLogger GridLogIterative  ; | ||||||
| extern GridLogger GridLogIntegrator  ; | extern GridLogger GridLogIntegrator  ; | ||||||
| extern GridLogger GridLogHMC; |  | ||||||
| extern Colours    GridLogColours; | extern Colours    GridLogColours; | ||||||
|  |  | ||||||
| std::string demangle(const char* name) ; | std::string demangle(const char* name) ; | ||||||
|   | |||||||
| @@ -39,10 +39,8 @@ using namespace Grid; | |||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| class NerscIO : public BinaryIO {  | class NerscIO : public BinaryIO {  | ||||||
| public: | public: | ||||||
|   typedef Lattice<vLorentzColourMatrixD> GaugeField; |  | ||||||
|  |  | ||||||
|   // Enable/disable exiting if the plaquette in the header does not match the value computed (default true) |   typedef Lattice<vLorentzColourMatrixD> GaugeField; | ||||||
|   static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; } |  | ||||||
|  |  | ||||||
|   static inline void truncate(std::string file){ |   static inline void truncate(std::string file){ | ||||||
|     std::ofstream fout(file,std::ios::out); |     std::ofstream fout(file,std::ios::out); | ||||||
| @@ -200,7 +198,7 @@ public: | |||||||
|       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; |       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||||
|       exit(0); |       exit(0); | ||||||
|     } |     } | ||||||
|     if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); |     assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||||
|     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); |     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||||
|     assert(nersc_csum == header.checksum ); |     assert(nersc_csum == header.checksum ); | ||||||
|        |        | ||||||
|   | |||||||
| @@ -63,7 +63,6 @@ static constexpr int Ngp=2; // gparity index range | |||||||
| #define ColourIndex  (2) | #define ColourIndex  (2) | ||||||
| #define SpinIndex    (1) | #define SpinIndex    (1) | ||||||
| #define LorentzIndex (0) | #define LorentzIndex (0) | ||||||
| #define GparityFlavourIndex (0) |  | ||||||
|  |  | ||||||
| // Also should make these a named enum type | // Also should make these a named enum type | ||||||
| static constexpr int DaggerNo=0; | static constexpr int DaggerNo=0; | ||||||
| @@ -88,8 +87,6 @@ template<typename T> struct isCoarsened { | |||||||
| template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | ||||||
| template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | ||||||
|  |  | ||||||
| const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! |  | ||||||
|  |  | ||||||
| // ChrisK very keen to add extra space for Gparity doubling. | // ChrisK very keen to add extra space for Gparity doubling. | ||||||
| // | // | ||||||
| // Also add domain wall index, in a way where Wilson operator  | // Also add domain wall index, in a way where Wilson operator  | ||||||
| @@ -104,7 +101,6 @@ template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iSca | |||||||
| template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | ||||||
| template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | ||||||
| template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | ||||||
| template<typename vtype> using iLorentzVector             = iVector<iScalar<iScalar<vtype> >, Nd > ; |  | ||||||
| template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | ||||||
| template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | ||||||
| template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | ||||||
| @@ -114,10 +110,8 @@ template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVec | |||||||
|     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >; |     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >; | ||||||
|  |  | ||||||
|  |  | ||||||
| template<typename vtype> using iGparityFlavourVector                = iVector<iScalar<iScalar<vtype> >, Ngp>; |  | ||||||
| template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >; | template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >; | ||||||
| template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >; | template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >; | ||||||
| template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>; |  | ||||||
|  |  | ||||||
| // Spin matrix | // Spin matrix | ||||||
| typedef iSpinMatrix<Complex  >          SpinMatrix; | typedef iSpinMatrix<Complex  >          SpinMatrix; | ||||||
| @@ -164,16 +158,7 @@ typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix; | |||||||
| typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF; | typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF; | ||||||
| typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD; | typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD; | ||||||
|  |  | ||||||
| // LorentzVector | // LorentzColour | ||||||
| typedef iLorentzVector<Complex  > LorentzVector; |  | ||||||
| typedef iLorentzVector<ComplexF > LorentzVectorF; |  | ||||||
| typedef iLorentzVector<ComplexD > LorentzVectorD; |  | ||||||
|  |  | ||||||
| typedef iLorentzVector<vComplex > vLorentzVector; |  | ||||||
| typedef iLorentzVector<vComplexF> vLorentzVectorF; |  | ||||||
| typedef iLorentzVector<vComplexD> vLorentzVectorD; |  | ||||||
|  |  | ||||||
| // LorentzColourMatrix |  | ||||||
| typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix; | typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix; | ||||||
| typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; | typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; | ||||||
| typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; | typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; | ||||||
| @@ -191,16 +176,6 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix; | |||||||
| typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF; | typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF; | ||||||
| typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD; | typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD; | ||||||
|  |  | ||||||
| //G-parity flavour matrix |  | ||||||
| typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix; |  | ||||||
| typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF; |  | ||||||
| typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD; |  | ||||||
|  |  | ||||||
| typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix; |  | ||||||
| typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF; |  | ||||||
| typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| // Spin vector | // Spin vector | ||||||
| typedef iSpinVector<Complex >           SpinVector; | typedef iSpinVector<Complex >           SpinVector; | ||||||
| typedef iSpinVector<ComplexF>           SpinVectorF; | typedef iSpinVector<ComplexF>           SpinVectorF; | ||||||
| @@ -246,16 +221,6 @@ typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector; | |||||||
| typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF; | typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF; | ||||||
| typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD; | typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD; | ||||||
|      |      | ||||||
| //G-parity flavour vector |  | ||||||
| typedef iGparityFlavourVector<Complex >         GparityFlavourVector; |  | ||||||
| typedef iGparityFlavourVector<ComplexF>         GparityFlavourVectorF; |  | ||||||
| typedef iGparityFlavourVector<ComplexD>         GparityFlavourVectorD; |  | ||||||
|  |  | ||||||
| typedef iGparityFlavourVector<vComplex >         vGparityFlavourVector; |  | ||||||
| typedef iGparityFlavourVector<vComplexF>         vGparityFlavourVectorF; |  | ||||||
| typedef iGparityFlavourVector<vComplexD>         vGparityFlavourVectorD; |  | ||||||
|  |  | ||||||
|      |  | ||||||
| // singlets | // singlets | ||||||
| typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type. | typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type. | ||||||
| typedef iSinglet<ComplexF>         TComplexF;    // FIXME This is painful. Tensor singlet complex type. | typedef iSinglet<ComplexF>         TComplexF;    // FIXME This is painful. Tensor singlet complex type. | ||||||
| @@ -298,10 +263,6 @@ typedef Lattice<vLorentzColourMatrix>  LatticeLorentzColourMatrix; | |||||||
| typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF; | typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF; | ||||||
| typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD; | typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD; | ||||||
|  |  | ||||||
| typedef Lattice<vLorentzVector>  LatticeLorentzVector; |  | ||||||
| typedef Lattice<vLorentzVectorF> LatticeLorentzVectorF; |  | ||||||
| typedef Lattice<vLorentzVectorD> LatticeLorentzVectorD; |  | ||||||
|  |  | ||||||
| // DoubleStored gauge field | // DoubleStored gauge field | ||||||
| typedef Lattice<vDoubleStoredColourMatrix>  LatticeDoubleStoredColourMatrix; | typedef Lattice<vDoubleStoredColourMatrix>  LatticeDoubleStoredColourMatrix; | ||||||
| typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF; | typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF; | ||||||
|   | |||||||
| @@ -30,7 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #pragma once | #ifndef GRID_QCD_ACTION_H | ||||||
|  | #define GRID_QCD_ACTION_H | ||||||
|  |  | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Abstract base interface | // Abstract base interface | ||||||
| @@ -50,4 +51,4 @@ NAMESPACE_CHECK(Fermion); | |||||||
| #include <Grid/qcd/action/pseudofermion/PseudoFermion.h> | #include <Grid/qcd/action/pseudofermion/PseudoFermion.h> | ||||||
| NAMESPACE_CHECK(PseudoFermion); | NAMESPACE_CHECK(PseudoFermion); | ||||||
|  |  | ||||||
|  | #endif | ||||||
|   | |||||||
| @@ -40,29 +40,6 @@ class Action | |||||||
|  |  | ||||||
| public: | public: | ||||||
|   bool is_smeared = false; |   bool is_smeared = false; | ||||||
|   RealD deriv_norm_sum; |  | ||||||
|   RealD deriv_max_sum; |  | ||||||
|   int   deriv_num; |  | ||||||
|   RealD deriv_us; |  | ||||||
|   RealD S_us; |  | ||||||
|   RealD refresh_us; |  | ||||||
|   void  reset_timer(void)        { |  | ||||||
|     deriv_us = S_us = refresh_us = 0.0; |  | ||||||
|     deriv_num=0; |  | ||||||
|     deriv_norm_sum = deriv_max_sum=0.0; |  | ||||||
|   } |  | ||||||
|   void  deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;} |  | ||||||
|   RealD deriv_max_average(void)         { return deriv_max_sum/deriv_num; }; |  | ||||||
|   RealD deriv_norm_average(void)        { return deriv_norm_sum/deriv_num; }; |  | ||||||
|   RealD deriv_timer(void)        { return deriv_us; }; |  | ||||||
|   RealD S_timer(void)            { return deriv_us; }; |  | ||||||
|   RealD refresh_timer(void)      { return deriv_us; }; |  | ||||||
|   void deriv_timer_start(void)   { deriv_us-=usecond(); } |  | ||||||
|   void deriv_timer_stop(void)    { deriv_us+=usecond(); } |  | ||||||
|   void refresh_timer_start(void) { refresh_us-=usecond(); } |  | ||||||
|   void refresh_timer_stop(void)  { refresh_us+=usecond(); } |  | ||||||
|   void S_timer_start(void)       { S_us-=usecond(); } |  | ||||||
|   void S_timer_stop(void)        { S_us+=usecond(); } |  | ||||||
|   // Heatbath? |   // Heatbath? | ||||||
|   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions |   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions | ||||||
|   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action |   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action | ||||||
|   | |||||||
| @@ -58,8 +58,6 @@ NAMESPACE_CHECK(Scalar); | |||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| // Utility functions | // Utility functions | ||||||
| //////////////////////////////////////////// | //////////////////////////////////////////// | ||||||
| #include <Grid/qcd/action/domains/Domains.h> |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/utils/Metric.h> | #include <Grid/qcd/utils/Metric.h> | ||||||
| NAMESPACE_CHECK(Metric); | NAMESPACE_CHECK(Metric); | ||||||
| #include <Grid/qcd/utils/CovariantLaplacian.h> | #include <Grid/qcd/utils/CovariantLaplacian.h> | ||||||
|   | |||||||
| @@ -36,34 +36,28 @@ NAMESPACE_BEGIN(Grid); | |||||||
|  |  | ||||||
| // These can move into a params header and be given MacroMagic serialisation | // These can move into a params header and be given MacroMagic serialisation | ||||||
| struct GparityWilsonImplParams { | struct GparityWilsonImplParams { | ||||||
|   Coordinate twists; //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  |   Coordinate twists; | ||||||
|                      //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs |   GparityWilsonImplParams() : twists(Nd, 0) {}; | ||||||
|   bool locally_periodic; |  | ||||||
|   GparityWilsonImplParams() : twists(Nd, 0), locally_periodic(false) {}; |  | ||||||
| }; | }; | ||||||
|    |    | ||||||
| struct WilsonImplParams { | struct WilsonImplParams { | ||||||
|   bool overlapCommsCompute; |   bool overlapCommsCompute; | ||||||
|   bool locally_periodic; |  | ||||||
|   AcceleratorVector<Real,Nd> twist_n_2pi_L; |   AcceleratorVector<Real,Nd> twist_n_2pi_L; | ||||||
|   AcceleratorVector<Complex,Nd> boundary_phases; |   AcceleratorVector<Complex,Nd> boundary_phases; | ||||||
|   WilsonImplParams()  { |   WilsonImplParams()  { | ||||||
|     boundary_phases.resize(Nd, 1.0); |     boundary_phases.resize(Nd, 1.0); | ||||||
|       twist_n_2pi_L.resize(Nd, 0.0); |       twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|       locally_periodic = false; |  | ||||||
|   }; |   }; | ||||||
|   WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { |   WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { | ||||||
|     twist_n_2pi_L.resize(Nd, 0.0); |     twist_n_2pi_L.resize(Nd, 0.0); | ||||||
|     locally_periodic = false; |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
| struct StaggeredImplParams { | struct StaggeredImplParams { | ||||||
|   bool locally_periodic; |   StaggeredImplParams()  {}; | ||||||
|   StaggeredImplParams() : locally_periodic(false) {}; |  | ||||||
| }; | }; | ||||||
|    |    | ||||||
| struct OneFlavourRationalParams : Serializable { |   struct OneFlavourRationalParams : Serializable { | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  |     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  | ||||||
| 				    RealD, lo,  | 				    RealD, lo,  | ||||||
| 				    RealD, hi,  | 				    RealD, hi,  | ||||||
| @@ -92,50 +86,6 @@ struct OneFlavourRationalParams : Serializable { | |||||||
|         BoundsCheckFreq(_BoundsCheckFreq){}; |         BoundsCheckFreq(_BoundsCheckFreq){}; | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
|  |  | ||||||
|   /*Action parameters for the generalized rational action |  | ||||||
|     The approximation is for (M^dag M)^{1/inv_pow} |  | ||||||
|     where inv_pow is the denominator of the fractional power. |  | ||||||
|     Default inv_pow=2 for square root, making this equivalent to  |  | ||||||
|     the OneFlavourRational action |  | ||||||
|   */ |  | ||||||
|     struct RationalActionParams : Serializable { |  | ||||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,  |  | ||||||
| 				    int, inv_pow,  |  | ||||||
| 				    RealD, lo, //low eigenvalue bound of rational approx |  | ||||||
| 				    RealD, hi, //high eigenvalue bound of rational approx |  | ||||||
| 				    int,   MaxIter,  //maximum iterations in msCG |  | ||||||
| 				    RealD, action_tolerance,  //msCG tolerance in action evaluation |  | ||||||
| 				    int,   action_degree, //rational approx tolerance in action evaluation |  | ||||||
| 				    RealD, md_tolerance,  //msCG tolerance in MD integration |  | ||||||
| 				    int,   md_degree, //rational approx tolerance in MD integration |  | ||||||
| 				    int,   precision, //precision of floating point arithmetic |  | ||||||
| 				    int,   BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check |  | ||||||
|   // constructor  |  | ||||||
|   RationalActionParams(int _inv_pow = 2, |  | ||||||
| 		       RealD _lo      = 0.0,  |  | ||||||
| 		       RealD _hi      = 1.0,  |  | ||||||
| 		       int _maxit     = 1000, |  | ||||||
| 		       RealD _action_tolerance      = 1.0e-8,  |  | ||||||
| 		       int _action_degree    = 10, |  | ||||||
| 		       RealD _md_tolerance      = 1.0e-8,  |  | ||||||
| 		       int _md_degree    = 10, |  | ||||||
| 		       int _precision = 64, |  | ||||||
| 		       int _BoundsCheckFreq=20) |  | ||||||
|     : inv_pow(_inv_pow),  |  | ||||||
|       lo(_lo), |  | ||||||
|       hi(_hi), |  | ||||||
|       MaxIter(_maxit), |  | ||||||
|       action_tolerance(_action_tolerance), |  | ||||||
|       action_degree(_action_degree), |  | ||||||
|       md_tolerance(_md_tolerance), |  | ||||||
|       md_degree(_md_degree), |  | ||||||
|       precision(_precision), |  | ||||||
|       BoundsCheckFreq(_BoundsCheckFreq){}; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
|    |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -1,98 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/momentum/DirichletFilter.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2021 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| 			   /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| // Dirichlet filter with sub-block size B[mu] |  | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| #pragma once  |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/action/domains/DomainDecomposition.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<typename MomentaField> |  | ||||||
| struct DirichletFilter: public MomentumFilterBase<MomentaField> |  | ||||||
| { |  | ||||||
|   Coordinate Block; |  | ||||||
|    |  | ||||||
|   DirichletFilter(const Coordinate &_Block): Block(_Block) {} |  | ||||||
|  |  | ||||||
|   // Edge detect using domain projectors |  | ||||||
|   void applyFilter (MomentaField &U) const override |  | ||||||
|   { |  | ||||||
|     DomainDecomposition Domains(Block); |  | ||||||
|     GridBase *grid = U.Grid(); |  | ||||||
|     LatticeInteger  coor(grid); |  | ||||||
|     LatticeInteger  face(grid); |  | ||||||
|     LatticeInteger  one(grid);   one = 1; |  | ||||||
|     LatticeInteger  zero(grid); zero = 0; |  | ||||||
|     LatticeInteger  omega(grid); |  | ||||||
|     LatticeInteger  omegabar(grid); |  | ||||||
|     LatticeInteger  tmp(grid); |  | ||||||
|  |  | ||||||
|     omega=one;    Domains.ProjectDomain(omega,0); |  | ||||||
|     omegabar=one; Domains.ProjectDomain(omegabar,1); |  | ||||||
|      |  | ||||||
|     LatticeInteger nface(grid); nface=Zero(); |  | ||||||
|      |  | ||||||
|     MomentaField projected(grid); projected=Zero(); |  | ||||||
|     typedef decltype(PeekIndex<LorentzIndex>(U,0)) MomentaLinkField; |  | ||||||
|     MomentaLinkField  Umu(grid); |  | ||||||
|     MomentaLinkField   zz(grid); zz=Zero(); |  | ||||||
|  |  | ||||||
|     int dims = grid->Nd(); |  | ||||||
|     Coordinate Global=grid->GlobalDimensions(); |  | ||||||
|     assert(dims==Nd); |  | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++){ |  | ||||||
|  |  | ||||||
|       if ( Block[mu]!=0 ) { |  | ||||||
|  |  | ||||||
| 	Umu = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
|  |  | ||||||
| 	// Upper face  |  | ||||||
|  	tmp = Cshift(omegabar,mu,1); |  | ||||||
| 	tmp = tmp + omega; |  | ||||||
| 	face = where(tmp == Integer(2),one,zero ); |  | ||||||
|  |  | ||||||
|  	tmp = Cshift(omega,mu,1); |  | ||||||
| 	tmp = tmp + omegabar; |  | ||||||
| 	face = where(tmp == Integer(2),one,face ); |  | ||||||
|  |  | ||||||
| 	Umu = where(face,zz,Umu); |  | ||||||
|  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, Umu, mu); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|    |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -1,187 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
| Grid physics library, www.github.com/paboyle/Grid |  | ||||||
|  |  | ||||||
| Source file: ./lib/qcd/action/domains/DomainDecomposition.h |  | ||||||
|  |  | ||||||
| Copyright (C) 2021 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify |  | ||||||
| it under the terms of the GNU General Public License as published by |  | ||||||
| the Free Software Foundation; either version 2 of the License, or |  | ||||||
| (at your option) any later version. |  | ||||||
|  |  | ||||||
| This program is distributed in the hope that it will be useful, |  | ||||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
| GNU General Public License for more details. |  | ||||||
|  |  | ||||||
| You should have received a copy of the GNU General Public License along |  | ||||||
| with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
| See the full license in the file "LICENSE" in the top level distribution |  | ||||||
| directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| 			   /*  END LEGAL */ |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| // Dirichlet filter with sub-block size B[mu] |  | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| #pragma once  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| struct DomainDecomposition |  | ||||||
| { |  | ||||||
|   Coordinate Block; |  | ||||||
|   static constexpr RealD factor = 0.6; |  | ||||||
|  |  | ||||||
|   DomainDecomposition(const Coordinate &_Block): Block(_Block){ assert(Block.size()==Nd);}; |  | ||||||
|    |  | ||||||
|   template<class Field> |  | ||||||
|   void ProjectDomain(Field &f,Integer domain) |  | ||||||
|   { |  | ||||||
|     GridBase *grid = f.Grid(); |  | ||||||
|     int dims = grid->Nd(); |  | ||||||
|     int isDWF= (dims==Nd+1); |  | ||||||
|     assert((dims==Nd)||(dims==Nd+1)); |  | ||||||
|  |  | ||||||
|     Field   zz(grid);  zz = Zero(); |  | ||||||
|     LatticeInteger coor(grid); |  | ||||||
|     LatticeInteger domaincoor(grid); |  | ||||||
|     LatticeInteger mask(grid); mask = Integer(1); |  | ||||||
|     LatticeInteger zi(grid);     zi = Integer(0); |  | ||||||
|     for(int d=0;d<Nd;d++){ |  | ||||||
|       Integer B= Block[d]; |  | ||||||
|       if ( B ) { |  | ||||||
| 	LatticeCoordinate(coor,d+isDWF); |  | ||||||
| 	domaincoor = mod(coor,B); |  | ||||||
| 	mask = where(domaincoor==Integer(0),zi,mask); |  | ||||||
| 	mask = where(domaincoor==Integer(B-1),zi,mask); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     if ( !domain ) |  | ||||||
|       f = where(mask==Integer(1),f,zz); |  | ||||||
|     else  |  | ||||||
|       f = where(mask==Integer(0),f,zz); |  | ||||||
|   }; |  | ||||||
|   template<class GaugeField> |  | ||||||
|   void ProjectDDHMC(GaugeField &U) |  | ||||||
|   { |  | ||||||
|     GridBase *grid = U.Grid(); |  | ||||||
|     Coordinate Global=grid->GlobalDimensions(); |  | ||||||
|     GaugeField zzz(grid); zzz = Zero(); |  | ||||||
|     LatticeInteger coor(grid);  |  | ||||||
|  |  | ||||||
|     GaugeField Uorg(grid); Uorg = U; |  | ||||||
|      |  | ||||||
|     auto zzz_mu = PeekIndex<LorentzIndex>(zzz,0); |  | ||||||
|     //////////////////////////////////////////////////// |  | ||||||
|     // Zero BDY layers |  | ||||||
|     //////////////////////////////////////////////////// |  | ||||||
|     for(int mu=0;mu<Nd;mu++) { |  | ||||||
|       Integer B1 = Block[mu]; |  | ||||||
|       if ( B1 && (B1 <= Global[mu]) ) { |  | ||||||
| 	LatticeCoordinate(coor,mu); |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	//////////////////////////////// |  | ||||||
| 	// OmegaBar - zero all links contained in slice B-1,0 and |  | ||||||
| 	// mu links connecting to Omega |  | ||||||
| 	//////////////////////////////// |  | ||||||
|  |  | ||||||
| 	U    = where(mod(coor,B1)==Integer(B1-1),zzz,U); |  | ||||||
| 	U    = where(mod(coor,B1)==Integer(0)   ,zzz,U);  |  | ||||||
|  |  | ||||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu);  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); |  | ||||||
|  |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     |  | ||||||
|     //////////////////////////////////////////// |  | ||||||
|     // Omega interior slow the evolution |  | ||||||
|     // Tricky as we need to take the smallest of values imposed by each cut |  | ||||||
|     // Do them in order or largest to smallest and smallest writes last |  | ||||||
|     //////////////////////////////////////////// |  | ||||||
|     RealD f= factor; |  | ||||||
| #if 0     |  | ||||||
|     for(int mu=0;mu<Nd;mu++) { |  | ||||||
|       Integer B1 = Block[mu]; |  | ||||||
|       if ( B1 && (B1 <= Global[mu]) ) { |  | ||||||
|  |  | ||||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); |  | ||||||
| 	// In the plane |  | ||||||
| 	U = where(mod(coor,B1)==Integer(B1-5),Uorg*f,U);  |  | ||||||
| 	U = where(mod(coor,B1)==Integer(4)   ,Uorg*f,U);  |  | ||||||
|  |  | ||||||
| 	// Perp links |  | ||||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-6),Uorg_mu*f,U_mu); |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(4)   ,Uorg_mu*f,U_mu); |  | ||||||
|  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
| #endif |  | ||||||
|     for(int mu=0;mu<Nd;mu++) { |  | ||||||
|       Integer B1 = Block[mu]; |  | ||||||
|       if ( B1 && (B1 <= Global[mu]) ) { |  | ||||||
|  |  | ||||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); |  | ||||||
| 	// In the plane |  | ||||||
| 	U = where(mod(coor,B1)==Integer(B1-4),Uorg*f*f,U);  |  | ||||||
| 	U = where(mod(coor,B1)==Integer(3)   ,Uorg*f*f,U);  |  | ||||||
|  |  | ||||||
| 	// Perp links |  | ||||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-5),Uorg_mu*f*f,U_mu); |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(3)   ,Uorg_mu*f*f,U_mu); |  | ||||||
|  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     for(int mu=0;mu<Nd;mu++) { |  | ||||||
|       Integer B1 = Block[mu]; |  | ||||||
|       if ( B1 && (B1 <= Global[mu]) ) { |  | ||||||
|  |  | ||||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); |  | ||||||
| 	// In the plane |  | ||||||
| 	U = where(mod(coor,B1)==Integer(B1-3),Uorg*f*f*f,U);  |  | ||||||
| 	U = where(mod(coor,B1)==Integer(2)   ,Uorg*f*f*f,U);  |  | ||||||
|  |  | ||||||
| 	// Perp links |  | ||||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-4),Uorg_mu*f*f*f,U_mu); |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(2)   ,Uorg_mu*f*f*f,U_mu); |  | ||||||
|  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     for(int mu=0;mu<Nd;mu++) { |  | ||||||
|       Integer B1 = Block[mu]; |  | ||||||
|       if ( B1 && (B1 <= Global[mu]) ) { |  | ||||||
|  |  | ||||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); |  | ||||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); |  | ||||||
| 	// In the plane |  | ||||||
| 	U = where(mod(coor,B1)==Integer(B1-2),zzz,U);  |  | ||||||
| 	U = where(mod(coor,B1)==Integer(1)   ,zzz,U);  |  | ||||||
|  |  | ||||||
| 	// Perp links |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(B1-3),Uorg_mu*f*f*f*f,U_mu); |  | ||||||
| 	U_mu = where(mod(coor,B1)==Integer(1)   ,Uorg_mu*f*f*f*f,U_mu); |  | ||||||
|  |  | ||||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
| @@ -60,8 +60,6 @@ public: | |||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   virtual void Dminus(const FermionField &psi, FermionField &chi); |   virtual void Dminus(const FermionField &psi, FermionField &chi); | ||||||
|   virtual void DminusDag(const FermionField &psi, FermionField &chi); |   virtual void DminusDag(const FermionField &psi, FermionField &chi); | ||||||
|   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported); |  | ||||||
|   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported); |  | ||||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); |   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||||
|   virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); |   virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); | ||||||
|   virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); |   virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); | ||||||
|   | |||||||
| @@ -1,185 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/qcd/action/fermion/DirichletFermionOperator.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2021 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////// |  | ||||||
| // Wrap a fermion operator in Dirichlet BC's at node boundary |  | ||||||
| //////////////////////////////////////////////////////////////// |  | ||||||
|      |  | ||||||
| template<class Impl> |  | ||||||
| class DirichletFermionOperator : public FermionOperator<Impl> |  | ||||||
| { |  | ||||||
| public: |  | ||||||
|  |  | ||||||
|   INHERIT_IMPL_TYPES(Impl); |  | ||||||
|  |  | ||||||
|   // Data members |  | ||||||
|   int CommsMode; |  | ||||||
|   Coordinate Block; |  | ||||||
|   DirichletFilter<GaugeField> Filter; |  | ||||||
|   FermionOperator<Impl> & FermOp; |  | ||||||
|    |  | ||||||
|   // Constructor / bespoke |  | ||||||
|   DirichletFermionOperator(FermionOperator<Impl> & _FermOp, Coordinate &_Block) |  | ||||||
|     : FermOp(_FermOp), Block(_Block), Filter(Block) |  | ||||||
|   { |  | ||||||
|     // Save what the comms mode should be under normal BCs |  | ||||||
|     CommsMode = WilsonKernelsStatic::Comms; |  | ||||||
|     assert((CommsMode == WilsonKernelsStatic::CommsAndCompute) |  | ||||||
|          ||(CommsMode == WilsonKernelsStatic::CommsThenCompute)); |  | ||||||
|  |  | ||||||
|     // Check the block size divides local lattice |  | ||||||
|     GridBase *grid = FermOp.GaugeGrid(); |  | ||||||
|  |  | ||||||
|     int blocks_per_rank = 1; |  | ||||||
|     Coordinate LocalDims = grid->LocalDimensions(); |  | ||||||
|     Coordinate GlobalDims= grid->GlobalDimensions(); |  | ||||||
|     assert(Block.size()==LocalDims.size()); |  | ||||||
|  |  | ||||||
|     for(int d=0;d<LocalDims.size();d++){ |  | ||||||
|       if (Block[d]&&(Block[d]<=GlobalDims[d])){ |  | ||||||
| 	int r = LocalDims[d] % Block[d]; |  | ||||||
| 	assert(r == 0); |  | ||||||
| 	blocks_per_rank *= (LocalDims[d] / Block[d]); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     // Even blocks per node required // could be relaxed but inefficient use of hardware as idle nodes in boundary operator R |  | ||||||
|     assert( blocks_per_rank != 0); |  | ||||||
|  |  | ||||||
|     // Possible checks that SIMD lanes are used with full occupancy??? |  | ||||||
|   }; |  | ||||||
|   virtual ~DirichletFermionOperator(void) = default; |  | ||||||
|  |  | ||||||
|   void DirichletOn(void)   { |  | ||||||
|     assert(WilsonKernelsStatic::Comms!= WilsonKernelsStatic::CommsDirichlet); |  | ||||||
|     //    WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsDirichlet; |  | ||||||
|   } |  | ||||||
|   void DirichletOff(void)  { |  | ||||||
|     //    assert(WilsonKernelsStatic::Comms== WilsonKernelsStatic::CommsDirichlet); |  | ||||||
|     //    WilsonKernelsStatic::Comms = CommsMode; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Implement the full interface |  | ||||||
|   virtual FermionField &tmp(void) { return FermOp.tmp(); }; |  | ||||||
|  |  | ||||||
|   virtual GridBase *FermionGrid(void)         { return FermOp.FermionGrid(); } |  | ||||||
|   virtual GridBase *FermionRedBlackGrid(void) { return FermOp.FermionRedBlackGrid(); } |  | ||||||
|   virtual GridBase *GaugeGrid(void)           { return FermOp.GaugeGrid(); } |  | ||||||
|   virtual GridBase *GaugeRedBlackGrid(void)   { return FermOp.GaugeRedBlackGrid(); } |  | ||||||
|    |  | ||||||
|   // override multiply |  | ||||||
|   virtual void  M    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.M(in,out);    DirichletOff();  }; |  | ||||||
|   virtual void  Mdag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mdag(in,out); DirichletOff();  }; |  | ||||||
|  |  | ||||||
|   // half checkerboard operaions |  | ||||||
|   virtual void   Meooe       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Meooe(in,out);    DirichletOff(); };   |  | ||||||
|   virtual void   MeooeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MeooeDag(in,out); DirichletOff(); }; |  | ||||||
|   virtual void   Mooee       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mooee(in,out);    DirichletOff(); }; |  | ||||||
|   virtual void   MooeeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeDag(in,out); DirichletOff(); }; |  | ||||||
|   virtual void   MooeeInv    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInv(in,out); DirichletOff(); }; |  | ||||||
|   virtual void   MooeeInvDag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInvDag(in,out); DirichletOff(); }; |  | ||||||
|  |  | ||||||
|   // non-hermitian hopping term; half cb or both |  | ||||||
|   virtual void Dhop  (const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.Dhop(in,out,dag);    DirichletOff(); }; |  | ||||||
|   virtual void DhopOE(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopOE(in,out,dag);  DirichletOff(); }; |  | ||||||
|   virtual void DhopEO(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopEO(in,out,dag);  DirichletOff(); }; |  | ||||||
|   virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp) { DirichletOn(); FermOp.DhopDir(in,out,dir,disp);  DirichletOff(); }; |  | ||||||
|  |  | ||||||
|   // force terms; five routines; default to Dhop on diagonal |  | ||||||
|   virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MDeriv(mat,U,V,dag);}; |  | ||||||
|   virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MoeDeriv(mat,U,V,dag);}; |  | ||||||
|   virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeoDeriv(mat,U,V,dag);}; |  | ||||||
|   virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MooDeriv(mat,U,V,dag);}; |  | ||||||
|   virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeeDeriv(mat,U,V,dag);}; |  | ||||||
|  |  | ||||||
|   virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDeriv(mat,U,V,dag);}; |  | ||||||
|   virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivEO(mat,U,V,dag);}; |  | ||||||
|   virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivOE(mat,U,V,dag);}; |  | ||||||
|  |  | ||||||
|   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);}; |  | ||||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp){FermOp.Mdir(in,out,dir,disp);}; |  | ||||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)    {FermOp.MdirAll(in,out);}; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////// |  | ||||||
|   // Updates gauge field during HMC |  | ||||||
|   /////////////////////////////////////////////// |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return FermOp.GetDoubledGaugeField(); }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return FermOp.GetDoubledGaugeFieldE(); }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return FermOp.GetDoubledGaugeFieldO(); }; |  | ||||||
|   virtual void ImportGauge(const GaugeField & _U) |  | ||||||
|   { |  | ||||||
|     GaugeField U = _U; |  | ||||||
|     // Filter gauge field to apply Dirichlet |  | ||||||
|     Filter.applyFilter(U); |  | ||||||
|     FermOp.ImportGauge(U); |  | ||||||
|   } |  | ||||||
|   /////////////////////////////////////////////// |  | ||||||
|   // Physical field import/export |  | ||||||
|   /////////////////////////////////////////////// |  | ||||||
|   virtual void Dminus(const FermionField &psi, FermionField &chi)    { FermOp.Dminus(psi,chi); } |  | ||||||
|   virtual void DminusDag(const FermionField &psi, FermionField &chi) { FermOp.DminusDag(psi,chi); } |  | ||||||
|   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported)   { FermOp.ImportFourDimPseudoFermion(input,imported);} |  | ||||||
|   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported){ FermOp.ExportFourDimPseudoFermion(solution,exported);} |  | ||||||
|   virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)  { FermOp.ImportPhysicalFermionSource(input,imported);} |  | ||||||
|   virtual void ImportUnphysicalFermion(const FermionField &input,FermionField &imported)      { FermOp.ImportUnphysicalFermion(input,imported);} |  | ||||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported) {FermOp.ExportPhysicalFermionSolution(solution,exported);} |  | ||||||
|   virtual void ExportPhysicalFermionSource(const FermionField &solution,FermionField &exported)   {FermOp.ExportPhysicalFermionSource(solution,exported);} |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |  | ||||||
|   // Should never be used |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |  | ||||||
|   virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; |  | ||||||
|   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {assert(0);} |  | ||||||
|   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { assert(0);} |  | ||||||
|   virtual void ContractConservedCurrent(PropagatorField &q_in_1, |  | ||||||
| 					PropagatorField &q_in_2, |  | ||||||
| 					PropagatorField &q_out, |  | ||||||
| 					PropagatorField &phys_src, |  | ||||||
| 					Current curr_type, |  | ||||||
| 					unsigned int mu) |  | ||||||
|   {assert(0);}; |  | ||||||
|   virtual void SeqConservedCurrent(PropagatorField &q_in,  |  | ||||||
| 				   PropagatorField &q_out, |  | ||||||
| 				   PropagatorField &phys_src, |  | ||||||
| 				   Current curr_type, |  | ||||||
| 				   unsigned int mu, |  | ||||||
| 				   unsigned int tmin,  |  | ||||||
| 				   unsigned int tmax, |  | ||||||
| 				   ComplexField &lattice_cmplx) |  | ||||||
|   {assert(0);}; |  | ||||||
|       // Only reimplemented in Wilson5D  |  | ||||||
|       // Default to just a zero correlation function |  | ||||||
|   virtual void ContractJ5q(FermionField &q_in   ,ComplexField &J5q) { J5q=Zero(); }; |  | ||||||
|   virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); }; |  | ||||||
|    |  | ||||||
| }; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
|  |  | ||||||
| @@ -101,12 +101,6 @@ NAMESPACE_CHECK(WilsonTM5); | |||||||
| #include <Grid/qcd/action/fermion/PauliVillarsInverters.h> | #include <Grid/qcd/action/fermion/PauliVillarsInverters.h> | ||||||
| #include <Grid/qcd/action/fermion/Reconstruct5Dprop.h> | #include <Grid/qcd/action/fermion/Reconstruct5Dprop.h> | ||||||
| #include <Grid/qcd/action/fermion/MADWF.h> | #include <Grid/qcd/action/fermion/MADWF.h> | ||||||
| //////////////////////////////////////////////////////////////////// |  | ||||||
| // DDHMC related  |  | ||||||
| //////////////////////////////////////////////////////////////////// |  | ||||||
| #include <Grid/qcd/action/fermion/DirichletFermionOperator.h> |  | ||||||
| #include <Grid/qcd/action/fermion/SchurFactoredFermionOperator.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_CHECK(DWFutils); | NAMESPACE_CHECK(DWFutils); | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -121,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR; | |||||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||||
|  |  | ||||||
| //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | ||||||
| //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | ||||||
| //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | ||||||
| typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | ||||||
| @@ -164,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; | |||||||
| typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | ||||||
| typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | ||||||
|  |  | ||||||
| //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | ||||||
| //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | ||||||
| //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | ||||||
| //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | ||||||
| //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<WilsonImplR> MobiusFermionR; | typedef MobiusFermion<WilsonImplR> MobiusFermionR; | ||||||
| typedef MobiusFermion<WilsonImplF> MobiusFermionF; | typedef MobiusFermion<WilsonImplF> MobiusFermionF; | ||||||
| typedef MobiusFermion<WilsonImplD> MobiusFermionD; | typedef MobiusFermion<WilsonImplD> MobiusFermionD; | ||||||
|  |  | ||||||
| //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | ||||||
| //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | ||||||
| //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | ||||||
|  |  | ||||||
| //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | ||||||
| //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | ||||||
| //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | ||||||
| typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | ||||||
| typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | ||||||
|  |  | ||||||
| //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | ||||||
| //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | ||||||
| //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | ||||||
|  |  | ||||||
| // Ls vectorised | // Ls vectorised | ||||||
| typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | ||||||
| @@ -241,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR>     GparityWilsonFermionR; | |||||||
| typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | ||||||
| typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | ||||||
|  |  | ||||||
| //typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | ||||||
| //typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | ||||||
| //typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | ||||||
| typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | ||||||
| typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | ||||||
|  |  | ||||||
| //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | ||||||
| //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | ||||||
| //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | ||||||
|  |  | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | ||||||
| typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | ||||||
|  |  | ||||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | ||||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | ||||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | ||||||
| typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | ||||||
|  |  | ||||||
| //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | ||||||
| //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | ||||||
| //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | ||||||
| typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | ||||||
| typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | ||||||
|  |  | ||||||
| //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | ||||||
| //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | ||||||
| //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | ||||||
|  |  | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | ||||||
| typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | ||||||
|  |  | ||||||
| //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | ||||||
| //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | ||||||
| //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | ||||||
|  |  | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | ||||||
| typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | ||||||
|   | |||||||
| @@ -25,7 +25,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | |||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|     *************************************************************************************/ |     *************************************************************************************/ | ||||||
|     /*  END LEGAL */ |     /*  END LEGAL */ | ||||||
| #pragma once | #ifndef  GRID_QCD_FERMION_CORE_H | ||||||
|  | #define  GRID_QCD_FERMION_CORE_H | ||||||
|  |  | ||||||
| #include <Grid/GridCore.h> | #include <Grid/GridCore.h> | ||||||
| #include <Grid/GridQCDcore.h> | #include <Grid/GridQCDcore.h> | ||||||
| @@ -44,3 +45,4 @@ NAMESPACE_CHECK(FermionOperator); | |||||||
| #include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions | #include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions | ||||||
| NAMESPACE_CHECK(Kernels); | NAMESPACE_CHECK(Kernels); | ||||||
|  |  | ||||||
|  | #endif | ||||||
|   | |||||||
| @@ -140,9 +140,6 @@ public: | |||||||
|   // Updates gauge field during HMC |   // Updates gauge field during HMC | ||||||
|   /////////////////////////////////////////////// |   /////////////////////////////////////////////// | ||||||
|   virtual void ImportGauge(const GaugeField & _U)=0; |   virtual void ImportGauge(const GaugeField & _U)=0; | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  =0; |  | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void)  =0; |  | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void)  =0; |  | ||||||
|  |  | ||||||
|   ////////////////////////////////////////////////////////////////////// |   ////////////////////////////////////////////////////////////////////// | ||||||
|   // Conserved currents, either contract at sink or insert sequentially. |   // Conserved currents, either contract at sink or insert sequentially. | ||||||
| @@ -174,16 +171,6 @@ public: | |||||||
|       /////////////////////////////////////////////// |       /////////////////////////////////////////////// | ||||||
|       virtual void Dminus(const FermionField &psi, FermionField &chi)    { chi=psi; } |       virtual void Dminus(const FermionField &psi, FermionField &chi)    { chi=psi; } | ||||||
|       virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } |       virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } | ||||||
|  |  | ||||||
|       virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported) |  | ||||||
|       { |  | ||||||
| 	imported = input; |  | ||||||
|       }; |  | ||||||
|       virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported) |  | ||||||
|       { |  | ||||||
| 	exported=solution; |  | ||||||
|       }; |  | ||||||
|  |  | ||||||
|       virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) |       virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) | ||||||
|       { |       { | ||||||
| 	imported = input; | 	imported = input; | ||||||
|   | |||||||
| @@ -30,18 +30,6 @@ directory | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| /* |  | ||||||
|   Policy implementation for G-parity boundary conditions |  | ||||||
|  |  | ||||||
|   Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular |  | ||||||
|   field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first |  | ||||||
|   with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field |  | ||||||
|   to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to |  | ||||||
|   either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor. |  | ||||||
|  |  | ||||||
|   Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  |  | ||||||
|   mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs |  | ||||||
|  */ |  | ||||||
| template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal> | template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal> | ||||||
| class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > { | class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > { | ||||||
| public: | public: | ||||||
| @@ -125,7 +113,7 @@ public: | |||||||
|     || ((distance== 1)&&(icoor[direction]==1)) |     || ((distance== 1)&&(icoor[direction]==1)) | ||||||
|     || ((distance==-1)&&(icoor[direction]==0)); |     || ((distance==-1)&&(icoor[direction]==0)); | ||||||
|  |  | ||||||
|     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction |     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world | ||||||
|  |  | ||||||
|     //Apply the links |     //Apply the links | ||||||
|     int f_upper = permute_lane ? 1 : 0; |     int f_upper = permute_lane ? 1 : 0; | ||||||
| @@ -151,10 +139,10 @@ public: | |||||||
|     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code |     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code | ||||||
|     assert((sl == 1) || (sl == 2)); |     assert((sl == 1) || (sl == 2)); | ||||||
|  |  | ||||||
|     //If this site is an global boundary site, perform the G-parity flavor twist |     if ( SE->_around_the_world && St.parameters.twists[mmu] ) { | ||||||
|     if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) { |  | ||||||
|       if ( sl == 2 ) { |       if ( sl == 2 ) { | ||||||
| 	//Only do the twist for lanes on the edge of the physical node |         | ||||||
| 	ExtractBuffer<sobj> vals(Nsimd); | 	ExtractBuffer<sobj> vals(Nsimd); | ||||||
|  |  | ||||||
| 	extract(chi,vals); | 	extract(chi,vals); | ||||||
| @@ -209,19 +197,6 @@ public: | |||||||
|     reg = memory; |     reg = memory; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   //Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds |  | ||||||
|   inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){ |  | ||||||
|     autoView(poke_f0_v, poke_f0, CpuRead); |  | ||||||
|     autoView(poke_f1_v, poke_f1, CpuRead); |  | ||||||
|     autoView(Uds_v, Uds, CpuWrite); |  | ||||||
|     thread_foreach(ss,poke_f0_v,{ |  | ||||||
| 	Uds_v[ss](0)(mu) = poke_f0_v[ss](); |  | ||||||
| 	Uds_v[ss](1)(mu) = poke_f1_v[ss](); |  | ||||||
|       }); |  | ||||||
|   } |  | ||||||
|      |  | ||||||
|  |  | ||||||
|   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) |   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||||
|   { |   { | ||||||
|     conformable(Uds.Grid(),GaugeGrid); |     conformable(Uds.Grid(),GaugeGrid); | ||||||
| @@ -233,18 +208,13 @@ public: | |||||||
|     |     | ||||||
|     Lattice<iScalar<vInteger> > coor(GaugeGrid); |     Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||||
|          |          | ||||||
|     //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  |     for(int mu=0;mu<Nd;mu++){ | ||||||
|     //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs         |  | ||||||
|     for(int mu=0;mu<Nd-1;mu++){ |  | ||||||
|            |            | ||||||
|       if( Params.twists[mu] ){ |  | ||||||
|       LatticeCoordinate(coor,mu); |       LatticeCoordinate(coor,mu); | ||||||
|       } |  | ||||||
|            |            | ||||||
|       U     = PeekIndex<LorentzIndex>(Umu,mu); |       U     = PeekIndex<LorentzIndex>(Umu,mu); | ||||||
|       Uconj = conjugate(U); |       Uconj = conjugate(U); | ||||||
|       |       | ||||||
|       // Implement the isospin rotation sign on the boundary between f=1 and f=0 |  | ||||||
|       // This phase could come from a simple bc 1,1,-1,1 .. |       // This phase could come from a simple bc 1,1,-1,1 .. | ||||||
|       int neglink = GaugeGrid->GlobalDimensions()[mu]-1; |       int neglink = GaugeGrid->GlobalDimensions()[mu]-1; | ||||||
|       if ( Params.twists[mu] ) {  |       if ( Params.twists[mu] ) {  | ||||||
| @@ -290,38 +260,6 @@ public: | |||||||
|         }); |         }); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     { //periodic / antiperiodic temporal BCs |  | ||||||
|       int mu = Nd-1; |  | ||||||
|       int L   = GaugeGrid->GlobalDimensions()[mu]; |  | ||||||
|       int Lmu = L - 1; |  | ||||||
|  |  | ||||||
|       LatticeCoordinate(coor, mu); |  | ||||||
|  |  | ||||||
|       U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links |  | ||||||
|        |  | ||||||
|       GaugeLinkField *Upoke = &U; |  | ||||||
|  |  | ||||||
|       if(Params.twists[mu]){ //antiperiodic |  | ||||||
| 	Utmp =  where(coor == Lmu, -U, U); |  | ||||||
| 	Upoke = &Utmp; |  | ||||||
|       } |  | ||||||
|      |  | ||||||
|       Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links       |  | ||||||
|       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu); |  | ||||||
|  |  | ||||||
|       //Get the barrel-shifted field |  | ||||||
|       Utmp = adj(Cshift(U, mu, -1)); //is a forward shift! |  | ||||||
|       Upoke = &Utmp; |  | ||||||
|  |  | ||||||
|       if(Params.twists[mu]){ |  | ||||||
| 	U = where(coor == 0, -Utmp, Utmp);  //boundary phase |  | ||||||
| 	Upoke = &U; |  | ||||||
|       } |  | ||||||
|        |  | ||||||
|       Uconj = conjugate(*Upoke); |  | ||||||
|       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4); |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
|        |        | ||||||
|   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { |   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { | ||||||
| @@ -362,55 +300,35 @@ public: | |||||||
|   } |   } | ||||||
|    |    | ||||||
|   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { |   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { | ||||||
|     int Ls=Btilde.Grid()->_fdimensions[0]; |  | ||||||
|  |  | ||||||
|  |     int Ls = Btilde.Grid()->_fdimensions[0]; | ||||||
|  |          | ||||||
|  |     GaugeLinkField tmp(mat.Grid()); | ||||||
|  |     tmp = Zero(); | ||||||
|     { |     { | ||||||
|       GridBase *GaugeGrid = mat.Grid(); |       autoView( tmp_v , tmp, CpuWrite); | ||||||
|       Lattice<iScalar<vInteger> > coor(GaugeGrid); |       autoView( Atilde_v , Atilde, CpuRead); | ||||||
|  |       autoView( Btilde_v , Btilde, CpuRead); | ||||||
|       if( Params.twists[mu] ){ |       thread_for(ss,tmp.Grid()->oSites(),{ | ||||||
| 	LatticeCoordinate(coor,mu); | 	  for (int s = 0; s < Ls; s++) { | ||||||
|  | 	    int sF = s + Ls * ss; | ||||||
|  | 	    auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF])); | ||||||
|  | 	    tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); | ||||||
| 	  } | 	  } | ||||||
|  |  | ||||||
|       autoView( mat_v , mat, AcceleratorWrite); |  | ||||||
|       autoView( Btilde_v , Btilde, AcceleratorRead); |  | ||||||
|       autoView( Atilde_v , Atilde, AcceleratorRead); |  | ||||||
|       accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{	   |  | ||||||
|   	  int sU=sss; |  | ||||||
|   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; |  | ||||||
|   	  ColorMatrixType sum; |  | ||||||
|   	  zeroit(sum); |  | ||||||
|   	  for(int s=0;s<Ls;s++){ |  | ||||||
|   	    int sF = s+Ls*sU; |  | ||||||
|   	    for(int spn=0;spn<Ns;spn++){ //sum over spin |  | ||||||
| 	      //Flavor 0 |  | ||||||
|   	      auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector |  | ||||||
|   	      auto aa = coalescedRead(Atilde_v[sF](0)(spn) ); |  | ||||||
|   	      sum = sum + outerProduct(bb,aa); |  | ||||||
|  |  | ||||||
|   	      //Flavor 1 |  | ||||||
|   	      bb = coalescedRead(Btilde_v[sF](1)(spn) ); |  | ||||||
|   	      aa = coalescedRead(Atilde_v[sF](1)(spn) ); |  | ||||||
|   	      sum = sum + conjugate(outerProduct(bb,aa)); |  | ||||||
|   	    } |  | ||||||
|   	  }	     |  | ||||||
|   	  coalescedWrite(mat_v[sU](mu)(), sum); |  | ||||||
| 	}); | 	}); | ||||||
|     } |     } | ||||||
|  |     PokeIndex<LorentzIndex>(mat, tmp, mu); | ||||||
|  |     return; | ||||||
|   } |   } | ||||||
|    |    | ||||||
|  |  | ||||||
|    |  | ||||||
|  |  | ||||||
|    |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR;  // Real.. whichever prec | typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR;  // Real.. whichever prec | ||||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | ||||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | ||||||
|   |   | ||||||
| //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | ||||||
| //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | ||||||
| //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -141,11 +141,8 @@ public: | |||||||
|   void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); |   void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); | ||||||
|   void ImportGaugeSimple(const GaugeField &_UUU    ,const GaugeField &_U); |   void ImportGaugeSimple(const GaugeField &_UUU    ,const GaugeField &_U); | ||||||
|   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); |   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; |   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; |   DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; |  | ||||||
|   virtual DoubledGaugeField &GetU(void)   { return Umu ; } ; |  | ||||||
|   virtual DoubledGaugeField &GetUUU(void) { return UUUmu; }; |  | ||||||
|   void CopyGaugeCheckerboards(void); |   void CopyGaugeCheckerboards(void); | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -167,9 +167,6 @@ public: | |||||||
|     void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); |     void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||||
|     // Give a reference; can be used to do an assignment or copy back out after import |     // Give a reference; can be used to do an assignment or copy back out after import | ||||||
|     // if Carleton wants to cache them and not use the ImportSimple |     // if Carleton wants to cache them and not use the ImportSimple | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; |  | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; |  | ||||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; |  | ||||||
|     DoubledGaugeField &GetU(void)   { return Umu ; } ; |     DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|     DoubledGaugeField &GetUUU(void) { return UUUmu; }; |     DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||||
|     void CopyGaugeCheckerboards(void); |     void CopyGaugeCheckerboards(void); | ||||||
|   | |||||||
| @@ -135,9 +135,6 @@ public: | |||||||
|  |  | ||||||
|   // DoubleStore impl dependent |   // DoubleStore impl dependent | ||||||
|   void ImportGauge      (const GaugeField &_U ); |   void ImportGauge      (const GaugeField &_U ); | ||||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; |  | ||||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; |   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||||
|   void CopyGaugeCheckerboards(void); |   void CopyGaugeCheckerboards(void); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,534 +0,0 @@ | |||||||
| /************************************************************************************* |  | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |  | ||||||
|  |  | ||||||
|     Source file: ./lib/qcd/action/fermion/SchurFactoredFermionOperator.h |  | ||||||
|  |  | ||||||
|     Copyright (C) 2021 |  | ||||||
|  |  | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> |  | ||||||
|  |  | ||||||
|     This program is free software; you can redistribute it and/or modify |  | ||||||
|     it under the terms of the GNU General Public License as published by |  | ||||||
|     the Free Software Foundation; either version 2 of the License, or |  | ||||||
|     (at your option) any later version. |  | ||||||
|  |  | ||||||
|     This program is distributed in the hope that it will be useful, |  | ||||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of |  | ||||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |  | ||||||
|     GNU General Public License for more details. |  | ||||||
|  |  | ||||||
|     You should have received a copy of the GNU General Public License along |  | ||||||
|     with this program; if not, write to the Free Software Foundation, Inc., |  | ||||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |  | ||||||
|  |  | ||||||
|     See the full license in the file "LICENSE" in the top level distribution directory |  | ||||||
| *************************************************************************************/ |  | ||||||
| /*  END LEGAL */ |  | ||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <Grid/qcd/utils/MixedPrecisionOperatorFunction.h> |  | ||||||
| #include <Grid/qcd/action/domains/Domains.h> |  | ||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); |  | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////// |  | ||||||
|   // Some explanation of class structure for domain decomposition: |  | ||||||
|   // |  | ||||||
|   // Need a dirichlet operator for two flavour determinant - acts on both Omega and OmegaBar. |  | ||||||
|   // |  | ||||||
|   // Possible gain if the global sums and CG are run independently?? Could measure this. |  | ||||||
|   // |  | ||||||
|   // Types of operations |  | ||||||
|   // |  | ||||||
|   // 1) assemble local det dOmega det dOmegaBar pseudofermion |  | ||||||
|   // |  | ||||||
|   // - DirichletFermionOperator - can either do a global solve, or independent/per cell coefficients. |  | ||||||
|   // |  | ||||||
|   // 2) assemble dOmegaInverse and dOmegaBarInverse in R |  | ||||||
|   // |  | ||||||
|   // - DirichletFermionOperator - can also be used to  |  | ||||||
|   //                                       - need two or more cells per node. Options |  | ||||||
|   //                                       - a) solve one cell at a time, no new code, CopyRegion and reduced /split Grids |  | ||||||
|   //                                       - b) solve multiple cells in parallel. predicated dslash implementation |  | ||||||
|   // |  | ||||||
|   //                                       - b) has more parallelism, experience with block solver suggest might not be aalgorithmically inefficient |  | ||||||
|   //                                         a) has more cache friendly and easier code. |  | ||||||
|   //                                         b) is easy to implement in a "trial" or inefficient code with projection. |  | ||||||
|   // |  | ||||||
|   // 3)  Additional functionality for domain operations |  | ||||||
|   // |  | ||||||
|   // - SchurFactoredFermionOperator  - Need a DDHMC utility - whether used in two flavour or one flavour  |  | ||||||
|   // |  | ||||||
|   // - dBoundary - needs non-dirichlet operator |  | ||||||
|   // - Contains one Dirichlet Op, and one non-Dirichlet op. Implements dBoundary etc... |  | ||||||
|   // - The Dirichlet ops can be passed to dOmega(Bar) solvers etc... |  | ||||||
|   // |  | ||||||
|   //////////////////////////////////////////////////////// |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class ImplD,class ImplF> |  | ||||||
| class SchurFactoredFermionOperator : public ImplD |  | ||||||
| { |  | ||||||
|   INHERIT_IMPL_TYPES(ImplD); |  | ||||||
|    |  | ||||||
|   typedef typename ImplF::FermionField FermionFieldF; |  | ||||||
|   typedef typename ImplD::FermionField FermionFieldD; |  | ||||||
|  |  | ||||||
|   typedef SchurDiagMooeeOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorD; |  | ||||||
|   typedef SchurDiagMooeeOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorF; |  | ||||||
|   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorDagD; |  | ||||||
|   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorDagF; |  | ||||||
|  |  | ||||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, |  | ||||||
| 							  FermionOperator<ImplF>, |  | ||||||
| 							  LinearOperatorD, |  | ||||||
| 							  LinearOperatorF> MxPCG; |  | ||||||
|  |  | ||||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, |  | ||||||
| 							  FermionOperator<ImplF>, |  | ||||||
| 							  LinearOperatorDagD, |  | ||||||
| 							  LinearOperatorDagF> MxDagPCG; |  | ||||||
| public: |  | ||||||
|  |  | ||||||
|   GridBase *FermionGrid(void) { return PeriodicFermOpD.FermionGrid(); }; |  | ||||||
|   GridBase *GaugeGrid(void)   { return PeriodicFermOpD.GaugeGrid(); }; |  | ||||||
|    |  | ||||||
|   FermionOperator<ImplD> & DirichletFermOpD; |  | ||||||
|   FermionOperator<ImplF> & DirichletFermOpF; |  | ||||||
|   FermionOperator<ImplD> & PeriodicFermOpD;  |  | ||||||
|   FermionOperator<ImplF> & PeriodicFermOpF;  |  | ||||||
|  |  | ||||||
|   LinearOperatorD DirichletLinOpD; |  | ||||||
|   LinearOperatorF DirichletLinOpF; |  | ||||||
|   LinearOperatorD PeriodicLinOpD; |  | ||||||
|   LinearOperatorF PeriodicLinOpF; |  | ||||||
|  |  | ||||||
|   LinearOperatorDagD DirichletLinOpDagD; |  | ||||||
|   LinearOperatorDagF DirichletLinOpDagF; |  | ||||||
|   LinearOperatorDagD PeriodicLinOpDagD; |  | ||||||
|   LinearOperatorDagF PeriodicLinOpDagF; |  | ||||||
|  |  | ||||||
|   // Can tinker with these in the pseudofermion for force vs. action solves |  | ||||||
|   Integer maxinnerit; |  | ||||||
|   Integer maxouterit; |  | ||||||
|   RealD tol; |  | ||||||
|   RealD tolinner; |  | ||||||
|    |  | ||||||
|   Coordinate Block; |  | ||||||
|  |  | ||||||
|   DomainDecomposition Domains; |  | ||||||
|  |  | ||||||
|   SchurFactoredFermionOperator(FermionOperator<ImplD>  & _PeriodicFermOpD, |  | ||||||
| 			       FermionOperator<ImplF>  & _PeriodicFermOpF, |  | ||||||
| 			       FermionOperator<ImplD>  & _DirichletFermOpD, |  | ||||||
| 			       FermionOperator<ImplF>  & _DirichletFermOpF, |  | ||||||
| 			       Coordinate &_Block) |  | ||||||
|     : Block(_Block), Domains(Block), |  | ||||||
|  |  | ||||||
|       PeriodicFermOpD(_PeriodicFermOpD), |  | ||||||
|       PeriodicFermOpF(_PeriodicFermOpF), |  | ||||||
|       DirichletFermOpD(_DirichletFermOpD), |  | ||||||
|       DirichletFermOpF(_DirichletFermOpF), |  | ||||||
|       DirichletLinOpD(DirichletFermOpD), |  | ||||||
|       DirichletLinOpF(DirichletFermOpF), |  | ||||||
|       PeriodicLinOpD(PeriodicFermOpD), |  | ||||||
|       PeriodicLinOpF(PeriodicFermOpF), |  | ||||||
|       DirichletLinOpDagD(DirichletFermOpD), |  | ||||||
|       DirichletLinOpDagF(DirichletFermOpF), |  | ||||||
|       PeriodicLinOpDagD(PeriodicFermOpD), |  | ||||||
|       PeriodicLinOpDagF(PeriodicFermOpF) |  | ||||||
|   { |  | ||||||
|     tol=1.0e-10; |  | ||||||
|     tolinner=1.0e-6; |  | ||||||
|     maxinnerit=1000; |  | ||||||
|     maxouterit=10; |  | ||||||
|     assert(PeriodicFermOpD.FermionGrid() == DirichletFermOpD.FermionGrid()); |  | ||||||
|     assert(PeriodicFermOpF.FermionGrid() == DirichletFermOpF.FermionGrid()); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   enum Domain { Omega=0, OmegaBar=1 }; |  | ||||||
|  |  | ||||||
|   void ImportGauge(const GaugeField &Umu) |  | ||||||
|   { |  | ||||||
|     // Single precision will update in the mixed prec CG |  | ||||||
|     PeriodicFermOpD.ImportGauge(Umu); |  | ||||||
|     GaugeField dUmu(Umu.Grid()); |  | ||||||
|     dUmu=Umu; |  | ||||||
|     //    DirchletBCs(dUmu); |  | ||||||
|     DirichletFilter<GaugeField> Filter(Block); |  | ||||||
|     Filter.applyFilter(dUmu); |  | ||||||
|     DirichletFermOpD.ImportGauge(dUmu); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|   void ProjectBoundaryBothDomains (FermionField &f,int sgn) |  | ||||||
|   { |  | ||||||
|     assert((sgn==1)||(sgn==-1)); |  | ||||||
|     Real rsgn = sgn; |  | ||||||
|  |  | ||||||
|     Gamma::Algebra Gmu [] = { |  | ||||||
|       Gamma::Algebra::GammaX, |  | ||||||
|       Gamma::Algebra::GammaY, |  | ||||||
|       Gamma::Algebra::GammaZ, |  | ||||||
|       Gamma::Algebra::GammaT |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     GridBase *grid = f.Grid(); |  | ||||||
|     LatticeInteger  coor(grid); |  | ||||||
|     LatticeInteger  face(grid); |  | ||||||
|     LatticeInteger  one(grid); one = 1; |  | ||||||
|     LatticeInteger  zero(grid); zero = 0; |  | ||||||
|     LatticeInteger nface(grid); nface=Zero(); |  | ||||||
|      |  | ||||||
|     FermionField projected(grid); projected=Zero(); |  | ||||||
|     FermionField sp_proj  (grid); |  | ||||||
|  |  | ||||||
|     int dims = grid->Nd(); |  | ||||||
|     int isDWF= (dims==Nd+1); |  | ||||||
|     assert((dims==Nd)||(dims==Nd+1)); |  | ||||||
|     Coordinate Global=grid->GlobalDimensions(); |  | ||||||
|  |  | ||||||
|     for(int mu=0;mu<Nd;mu++){ |  | ||||||
|  |  | ||||||
|       if ( Block[mu] <= Global[mu+isDWF] ) { |  | ||||||
| 	// need to worry about DWF 5th dim first |  | ||||||
| 	LatticeCoordinate(coor,mu+isDWF);  |  | ||||||
|        |  | ||||||
| 	face = where(mod(coor,Block[mu]) == Integer(0),one,zero ); |  | ||||||
| 	nface = nface + face; |  | ||||||
|  |  | ||||||
| 	Gamma G(Gmu[mu]); |  | ||||||
| 	// Lower face receives (1-gamma)/2 in normal forward hopping term |  | ||||||
| 	sp_proj  = 0.5*(f-G*f*rsgn); |  | ||||||
| 	projected= where(face,sp_proj,projected); |  | ||||||
| 	//projected= where(face,f,projected); |  | ||||||
|        |  | ||||||
| 	face = where(mod(coor,Block[mu]) == Integer(Block[mu]-1) ,one,zero ); |  | ||||||
| 	nface = nface + face; |  | ||||||
|  |  | ||||||
| 	// Upper face receives (1+gamma)/2 in normal backward hopping term |  | ||||||
| 	sp_proj = 0.5*(f+G*f*rsgn); |  | ||||||
| 	projected= where(face,sp_proj,projected); |  | ||||||
| 	//projected= where(face,f,projected); |  | ||||||
|       } |  | ||||||
|        |  | ||||||
|     } |  | ||||||
|     // Initial Zero() where nface==0. |  | ||||||
|     // Keep the spin projected faces where nface==1 |  | ||||||
|     // Full spinor where nface>=2 |  | ||||||
|     projected = where(nface>Integer(1),f,projected); |  | ||||||
|     f=projected; |  | ||||||
|   } |  | ||||||
| */ |  | ||||||
|   void ProjectBoundaryBothDomains (FermionField &f,int sgn) |  | ||||||
|   { |  | ||||||
|     assert((sgn==1)||(sgn==-1)); |  | ||||||
|     Real rsgn = sgn; |  | ||||||
|  |  | ||||||
|     Gamma::Algebra Gmu [] = { |  | ||||||
|       Gamma::Algebra::GammaX, |  | ||||||
|       Gamma::Algebra::GammaY, |  | ||||||
|       Gamma::Algebra::GammaZ, |  | ||||||
|       Gamma::Algebra::GammaT |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     GridBase *grid = f.Grid(); |  | ||||||
|     LatticeInteger  coor(grid); |  | ||||||
|     LatticeInteger  face(grid); |  | ||||||
|     LatticeInteger  one(grid);   one = 1; |  | ||||||
|     LatticeInteger  zero(grid); zero = 0; |  | ||||||
|     LatticeInteger  omega(grid); |  | ||||||
|     LatticeInteger  omegabar(grid); |  | ||||||
|     LatticeInteger  tmp(grid); |  | ||||||
|  |  | ||||||
|     omega=one;    Domains.ProjectDomain(omega,0); |  | ||||||
|     omegabar=one; Domains.ProjectDomain(omegabar,1); |  | ||||||
|      |  | ||||||
|     LatticeInteger nface(grid); nface=Zero(); |  | ||||||
|      |  | ||||||
|     FermionField projected(grid); projected=Zero(); |  | ||||||
|     FermionField sp_proj  (grid); |  | ||||||
|  |  | ||||||
|     int dims = grid->Nd(); |  | ||||||
|     int isDWF= (dims==Nd+1); |  | ||||||
|     assert((dims==Nd)||(dims==Nd+1)); |  | ||||||
|     Coordinate Global=grid->GlobalDimensions(); |  | ||||||
|  |  | ||||||
|     for(int mmu=0;mmu<Nd;mmu++){ |  | ||||||
|       Gamma G(Gmu[mmu]); |  | ||||||
|  |  | ||||||
|       // need to worry about DWF 5th dim first |  | ||||||
|       int mu = mmu+isDWF; |  | ||||||
|       if ( Block[mmu] && (Block[mmu] <= Global[mu]) ) { |  | ||||||
|  |  | ||||||
| 	// Lower face receives (1-gamma)/2 in normal forward hopping term |  | ||||||
|  	tmp = Cshift(omegabar,mu,-1); |  | ||||||
| 	tmp = tmp + omega; |  | ||||||
| 	face = where(tmp == Integer(2),one,zero ); |  | ||||||
|  |  | ||||||
|  	tmp = Cshift(omega,mu,-1); |  | ||||||
| 	tmp = tmp + omegabar; |  | ||||||
| 	face = where(tmp == Integer(2),one,face ); |  | ||||||
|  |  | ||||||
| 	nface = nface + face; |  | ||||||
|  |  | ||||||
| 	sp_proj  = 0.5*(f-G*f*rsgn); |  | ||||||
| 	projected= where(face,sp_proj,projected); |  | ||||||
|  |  | ||||||
| 	// Upper face receives (1+gamma)/2 in normal backward hopping term |  | ||||||
|  	tmp = Cshift(omegabar,mu,1); |  | ||||||
| 	tmp = tmp + omega; |  | ||||||
| 	face = where(tmp == Integer(2),one,zero ); |  | ||||||
|  |  | ||||||
|  	tmp = Cshift(omega,mu,1); |  | ||||||
| 	tmp = tmp + omegabar; |  | ||||||
| 	face = where(tmp == Integer(2),one,face ); |  | ||||||
|  |  | ||||||
| 	nface = nface + face; |  | ||||||
|  |  | ||||||
| 	sp_proj = 0.5*(f+G*f*rsgn); |  | ||||||
| 	projected= where(face,sp_proj,projected); |  | ||||||
|       } |  | ||||||
|        |  | ||||||
|     } |  | ||||||
|     // Initial Zero() where nface==0. |  | ||||||
|     // Keep the spin projected faces where nface==1 |  | ||||||
|     // Full spinor where nface>=2 |  | ||||||
|     projected = where(nface>Integer(1),f,projected); |  | ||||||
|     f=projected; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   void ProjectDomain(FermionField &f,int domain) |  | ||||||
|   { |  | ||||||
| /* |  | ||||||
|     GridBase *grid = f.Grid(); |  | ||||||
|     int dims = grid->Nd(); |  | ||||||
|     int isDWF= (dims==Nd+1); |  | ||||||
|     assert((dims==Nd)||(dims==Nd+1)); |  | ||||||
|  |  | ||||||
|     FermionField zz(grid); zz=Zero(); |  | ||||||
|     LatticeInteger coor(grid); |  | ||||||
|     LatticeInteger domaincb(grid); domaincb=Zero(); |  | ||||||
|     for(int d=0;d<Nd;d++){ |  | ||||||
|       LatticeCoordinate(coor,d+isDWF); |  | ||||||
|       domaincb = domaincb + div(coor,Block[d]); |  | ||||||
|     } |  | ||||||
|     f = where(mod(domaincb,2)==Integer(domain),f,zz); |  | ||||||
| */ |  | ||||||
|     Domains.ProjectDomain(f,domain); |  | ||||||
|  |  | ||||||
|   }; |  | ||||||
|   void ProjectOmegaBar   (FermionField &f) {ProjectDomain(f,OmegaBar);} |  | ||||||
|   void ProjectOmega      (FermionField &f) {ProjectDomain(f,Omega);} |  | ||||||
|   // See my notes(!). |  | ||||||
|   // Notation: Following Luscher, we introduce projectors $\hPdb$ with both spinor and space structure |  | ||||||
|   // projecting all spinor elements in $\Omega$ connected by $\Ddb$ to $\bar{\Omega}$, |  | ||||||
|   void ProjectBoundaryBar(FermionField &f) |  | ||||||
|   { |  | ||||||
|     ProjectBoundaryBothDomains(f,1); |  | ||||||
|     ProjectOmega(f); |  | ||||||
|   } |  | ||||||
|   // and $\hPd$ projecting all spinor elements in $\bar{\Omega}$ connected by $\Dd$ to $\Omega$. |  | ||||||
|   void ProjectBoundary   (FermionField &f) |  | ||||||
|   { |  | ||||||
|     ProjectBoundaryBothDomains(f,1); |  | ||||||
|     ProjectOmegaBar(f); |  | ||||||
|     //    DumpSliceNorm("ProjectBoundary",f,f.Grid()->Nd()-1); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   void dBoundary    (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     PeriodicFermOpD.M(tmp,out); |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dBoundaryDag (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     PeriodicFermOpD.Mdag(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dBoundaryBar (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     PeriodicFermOpD.M(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dBoundaryBarDag (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     PeriodicFermOpD.Mdag(tmp,out); |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dOmega       (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     DirichletFermOpD.M(tmp,out); |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaBar    (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     DirichletFermOpD.M(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaDag       (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     DirichletFermOpD.Mdag(tmp,out); |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaBarDag    (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     DirichletFermOpD.Mdag(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaInv   (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     dOmegaInvAndOmegaBarInv(tmp,out); // Inefficient warning |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaBarInv(FermionField &in,FermionField &out) |  | ||||||
|   {     |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     dOmegaInvAndOmegaBarInv(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaDagInv   (FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmega(tmp); |  | ||||||
|     dOmegaDagInvAndOmegaBarDagInv(tmp,out); |  | ||||||
|     ProjectOmega(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaBarDagInv(FermionField &in,FermionField &out) |  | ||||||
|   {     |  | ||||||
|     FermionField tmp(in); |  | ||||||
|     ProjectOmegaBar(tmp); |  | ||||||
|     dOmegaDagInvAndOmegaBarDagInv(tmp,out); |  | ||||||
|     ProjectOmegaBar(out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaInvAndOmegaBarInv(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     MxPCG OmegaSolver(tol, |  | ||||||
| 		      tolinner, |  | ||||||
| 		      maxinnerit, |  | ||||||
| 		      maxouterit, |  | ||||||
| 		      DirichletFermOpF.FermionRedBlackGrid(), |  | ||||||
| 		      DirichletFermOpF, |  | ||||||
| 		      DirichletFermOpD, |  | ||||||
| 		      DirichletLinOpF, |  | ||||||
| 		      DirichletLinOpD); |  | ||||||
|     SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(OmegaSolver); |  | ||||||
|     PrecSolve(DirichletFermOpD,in,out); |  | ||||||
|   }; |  | ||||||
|   void dOmegaDagInvAndOmegaBarDagInv(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     MxDagPCG OmegaDagSolver(tol, |  | ||||||
| 			    tolinner, |  | ||||||
| 			    maxinnerit, |  | ||||||
| 			    maxouterit, |  | ||||||
| 			    DirichletFermOpF.FermionRedBlackGrid(), |  | ||||||
| 			    DirichletFermOpF, |  | ||||||
| 			    DirichletFermOpD, |  | ||||||
| 			    DirichletLinOpDagF, |  | ||||||
| 			    DirichletLinOpDagD); |  | ||||||
|     SchurRedBlackDiagMooeeDagSolve<FermionField> PrecSolve(OmegaDagSolver); |  | ||||||
|     PrecSolve(DirichletFermOpD,in,out); |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   // Rdag = Pdbar - DdbarDag DomegabarDagInv  DdDag DomegaDagInv Pdbar  |  | ||||||
|   void RDag(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     FermionField tmp2(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     out = in; |  | ||||||
|     ProjectBoundaryBar(out); |  | ||||||
|     dOmegaDagInv(out,tmp1);    |  | ||||||
|     dBoundaryDag(tmp1,tmp2);    |  | ||||||
|     dOmegaBarDagInv(tmp2,tmp1); |  | ||||||
|     dBoundaryBarDag(tmp1,tmp2);  |  | ||||||
|     out = out - tmp2; |  | ||||||
|   }; |  | ||||||
|  |  | ||||||
|   // R = Pdbar - Pdbar DomegaInv Dd DomegabarInv Ddbar |  | ||||||
|   void R(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     FermionField tmp2(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     out = in; |  | ||||||
|     ProjectBoundaryBar(out); |  | ||||||
|     dBoundaryBar(out,tmp1);  |  | ||||||
|     dOmegaBarInv(tmp1,tmp2); |  | ||||||
|     dBoundary(tmp2,tmp1);    |  | ||||||
|     dOmegaInv(tmp1,tmp2);    |  | ||||||
|     out = in - tmp2 ;        |  | ||||||
|     ProjectBoundaryBar(out); |  | ||||||
|     //    DumpSliceNorm("R",out,out.Grid()->Nd()-1); |  | ||||||
|   }; |  | ||||||
|    |  | ||||||
|   // R = Pdbar - Pdbar Dinv Ddbar  |  | ||||||
|   void RInv(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     dBoundaryBar(in,out); |  | ||||||
|     Dinverse(out,tmp1);   |  | ||||||
|     out =in -tmp1;  |  | ||||||
|     ProjectBoundaryBar(out); |  | ||||||
|   }; |  | ||||||
|   // R = Pdbar - DdbarDag DinvDag Pdbar  |  | ||||||
|   void RDagInv(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     FermionField tmp(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     FermionField Pin(PeriodicFermOpD.FermionGrid()); |  | ||||||
|     Pin = in; ProjectBoundaryBar(Pin); |  | ||||||
|     DinverseDag(Pin,out);   |  | ||||||
|     dBoundaryBarDag(out,tmp); |  | ||||||
|     out =Pin -tmp;  |  | ||||||
|   }; |  | ||||||
|   // Non-dirichlet inverter using red-black preconditioning |  | ||||||
|   void Dinverse(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     MxPCG DSolver(tol, |  | ||||||
| 		  tolinner, |  | ||||||
| 		  maxinnerit, |  | ||||||
| 		  maxouterit, |  | ||||||
| 		  PeriodicFermOpF.FermionRedBlackGrid(), |  | ||||||
| 		  PeriodicFermOpF, |  | ||||||
| 		  PeriodicFermOpD, |  | ||||||
| 		  PeriodicLinOpF, |  | ||||||
| 		  PeriodicLinOpD); |  | ||||||
|     SchurRedBlackDiagMooeeSolve<FermionField> Solve(DSolver); |  | ||||||
|     Solve(PeriodicFermOpD,in,out); |  | ||||||
|   } |  | ||||||
|   void DinverseDag(FermionField &in,FermionField &out) |  | ||||||
|   { |  | ||||||
|     MxDagPCG DdagSolver(tol, |  | ||||||
| 			tolinner, |  | ||||||
| 			maxinnerit, |  | ||||||
| 			maxouterit, |  | ||||||
| 			PeriodicFermOpF.FermionRedBlackGrid(), |  | ||||||
| 			PeriodicFermOpF, |  | ||||||
| 			PeriodicFermOpD, |  | ||||||
| 			PeriodicLinOpDagF, |  | ||||||
| 			PeriodicLinOpDagD); |  | ||||||
|     SchurRedBlackDiagMooeeDagSolve<FermionField> Solve(DdagSolver); |  | ||||||
|     Solve(PeriodicFermOpD,in,out); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); |  | ||||||
|  |  | ||||||
| @@ -68,12 +68,11 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { |   template<class _SiteHalfSpinor, class _SiteSpinor> | ||||||
|     typedef decltype(coalescedRead(buf)) sobj; |   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { | ||||||
|     sobj sp; |     _SiteHalfSpinor tmp; | ||||||
|     auto sin = coalescedRead(in); |     projector::Proj(tmp,in,mu,dag); | ||||||
|     projector::Proj(sp,sin,mu,dag); |     vstream(buf[o],tmp); | ||||||
|     coalescedWrite(buf,sp); |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -83,18 +82,13 @@ public: | |||||||
| 				   const SiteHalfSpinor * __restrict__ vp0, | 				   const SiteHalfSpinor * __restrict__ vp0, | ||||||
| 				   const SiteHalfSpinor * __restrict__ vp1, | 				   const SiteHalfSpinor * __restrict__ vp1, | ||||||
| 				   Integer type,Integer o) const { | 				   Integer type,Integer o) const { | ||||||
| #ifdef GRID_SIMT |  | ||||||
|     exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); |  | ||||||
| #else |  | ||||||
|     SiteHalfSpinor tmp1; |     SiteHalfSpinor tmp1; | ||||||
|     SiteHalfSpinor tmp2; |     SiteHalfSpinor tmp2; | ||||||
|     exchange(tmp1,tmp2,vp0[o],vp1[o],type); |     exchange(tmp1,tmp2,vp0[o],vp1[o],type); | ||||||
|     vstream(mp[2*o  ],tmp1); |     vstream(mp[2*o  ],tmp1); | ||||||
|     vstream(mp[2*o+1],tmp2); |     vstream(mp[2*o+1],tmp2); | ||||||
| #endif |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Have a decompression step if mpi data is not same */ |   /* Have a decompression step if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -111,28 +105,6 @@ public: | |||||||
| 					   const SiteSpinor * __restrict__ in, | 					   const SiteSpinor * __restrict__ in, | ||||||
| 					   Integer j,Integer k, Integer m,Integer type) const | 					   Integer j,Integer k, Integer m,Integer type) const | ||||||
|   { |   { | ||||||
| #ifdef GRID_SIMT |  | ||||||
|     typedef SiteSpinor vobj; |  | ||||||
|     typedef SiteHalfSpinor hvobj; |  | ||||||
|     typedef decltype(coalescedRead(*in))    sobj; |  | ||||||
|     typedef decltype(coalescedRead(*out0)) hsobj; |  | ||||||
|  |  | ||||||
|     unsigned int Nsimd = vobj::Nsimd(); |  | ||||||
|     unsigned int mask = Nsimd >> (type + 1); |  | ||||||
|     int lane = acceleratorSIMTlane(Nsimd); |  | ||||||
|     int j0 = lane &(~mask); // inner coor zero |  | ||||||
|     int j1 = lane |(mask) ; // inner coor one |  | ||||||
|     const vobj *vp0 = &in[k]; |  | ||||||
|     const vobj *vp1 = &in[m]; |  | ||||||
|     const vobj *vp = (lane&mask) ? vp1:vp0; |  | ||||||
|     auto sa = coalescedRead(*vp,j0); |  | ||||||
|     auto sb = coalescedRead(*vp,j1); |  | ||||||
|     hsobj psa, psb; |  | ||||||
|     projector::Proj(psa,sa,mu,dag); |  | ||||||
|     projector::Proj(psb,sb,mu,dag); |  | ||||||
|     coalescedWrite(out0[j],psa); |  | ||||||
|     coalescedWrite(out1[j],psb); |  | ||||||
| #else |  | ||||||
|     SiteHalfSpinor temp1, temp2; |     SiteHalfSpinor temp1, temp2; | ||||||
|     SiteHalfSpinor temp3, temp4; |     SiteHalfSpinor temp3, temp4; | ||||||
|     projector::Proj(temp1,in[k],mu,dag); |     projector::Proj(temp1,in[k],mu,dag); | ||||||
| @@ -140,7 +112,6 @@ public: | |||||||
|     exchange(temp3,temp4,temp1,temp2,type); |     exchange(temp3,temp4,temp1,temp2,type); | ||||||
|     vstream(out0[j],temp3); |     vstream(out0[j],temp3); | ||||||
|     vstream(out1[j],temp4); |     vstream(out1[j],temp4); | ||||||
| #endif |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
| @@ -150,7 +121,6 @@ public: | |||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #if 0 |  | ||||||
| template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | ||||||
| class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | ||||||
| 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | ||||||
| @@ -179,23 +149,13 @@ public: | |||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Compress includes precision change if mpi data is not same */ |   /* Compress includes precision change if mpi data is not same */ | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { |   template<class _SiteHalfSpinor, class _SiteSpinor> | ||||||
|     SiteHalfSpinor hsp; |   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { | ||||||
|  |     _SiteHalfSpinor hsp; | ||||||
|     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; |     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; | ||||||
|     projector::Proj(hsp,in,mu,dag); |     projector::Proj(hsp,in,mu,dag); | ||||||
|     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); |     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); | ||||||
|   } |   } | ||||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { |  | ||||||
| #ifdef GRID_SIMT |  | ||||||
|     typedef decltype(coalescedRead(buf)) sobj; |  | ||||||
|     sobj sp; |  | ||||||
|     auto sin = coalescedRead(in); |  | ||||||
|     projector::Proj(sp,sin,mu,dag); |  | ||||||
|     coalescedWrite(buf,sp); |  | ||||||
| #else |  | ||||||
|     projector::Proj(buf,in,mu,dag); |  | ||||||
| #endif |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /*****************************************************/ |   /*****************************************************/ | ||||||
|   /* Exchange includes precision change if mpi data is not same */ |   /* Exchange includes precision change if mpi data is not same */ | ||||||
| @@ -243,7 +203,6 @@ public: | |||||||
|   accelerator_inline bool DecompressionStep(void) const { return true; } |   accelerator_inline bool DecompressionStep(void) const { return true; } | ||||||
|  |  | ||||||
| }; | }; | ||||||
| #endif |  | ||||||
|  |  | ||||||
| #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | ||||||
|   class Projector {							\ |   class Projector {							\ | ||||||
| @@ -294,8 +253,33 @@ public: | |||||||
|   typedef typename Base::View_type View_type; |   typedef typename Base::View_type View_type; | ||||||
|   typedef typename Base::StencilVector StencilVector; |   typedef typename Base::StencilVector StencilVector; | ||||||
|  |  | ||||||
|   void ZeroCountersi(void)  {  } |   double timer0; | ||||||
|   void Reporti(int calls)  {  } |   double timer1; | ||||||
|  |   double timer2; | ||||||
|  |   double timer3; | ||||||
|  |   double timer4; | ||||||
|  |   double timer5; | ||||||
|  |   double timer6; | ||||||
|  |   uint64_t callsi; | ||||||
|  |   void ZeroCountersi(void) | ||||||
|  |   { | ||||||
|  |     timer0=0; | ||||||
|  |     timer1=0; | ||||||
|  |     timer2=0; | ||||||
|  |     timer3=0; | ||||||
|  |     timer4=0; | ||||||
|  |     timer5=0; | ||||||
|  |     timer6=0; | ||||||
|  |     callsi=0; | ||||||
|  |   } | ||||||
|  |   void Reporti(int calls) | ||||||
|  |   { | ||||||
|  |     if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl; | ||||||
|  |     if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate)   " <<timer1/calls <<std::endl; | ||||||
|  |     if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge )   " <<timer2/calls <<std::endl; | ||||||
|  |     if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl; | ||||||
|  |     if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   std::vector<int> surface_list; |   std::vector<int> surface_list; | ||||||
|  |  | ||||||
| @@ -303,10 +287,8 @@ public: | |||||||
| 		int npoints, | 		int npoints, | ||||||
| 		int checkerboard, | 		int checkerboard, | ||||||
| 		const std::vector<int> &directions, | 		const std::vector<int> &directions, | ||||||
| 		const std::vector<int> &distances, | 		const std::vector<int> &distances,Parameters p)   | ||||||
| 		bool locally_periodic, |     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)  | ||||||
| 		Parameters p)   |  | ||||||
|     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,locally_periodic,p) |  | ||||||
|   {  |   {  | ||||||
|     ZeroCountersi(); |     ZeroCountersi(); | ||||||
|     surface_list.resize(0); |     surface_list.resize(0); | ||||||
| @@ -339,18 +321,26 @@ public: | |||||||
|   { |   { | ||||||
|     std::vector<std::vector<CommsRequest_t> > reqs; |     std::vector<std::vector<CommsRequest_t> > reqs; | ||||||
|     this->HaloExchangeOptGather(source,compress); |     this->HaloExchangeOptGather(source,compress); | ||||||
|  |     double t1=usecond(); | ||||||
|     // Asynchronous MPI calls multidirectional, Isend etc... |     // Asynchronous MPI calls multidirectional, Isend etc... | ||||||
|     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. |     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. | ||||||
|     this->Communicate(); |     this->Communicate(); | ||||||
|  |     double t2=usecond(); timer1 += t2-t1; | ||||||
|     this->CommsMerge(compress); |     this->CommsMerge(compress); | ||||||
|  |     double t3=usecond(); timer2 += t3-t2; | ||||||
|     this->CommsMergeSHM(compress); |     this->CommsMergeSHM(compress); | ||||||
|  |     double t4=usecond(); timer3 += t4-t3; | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
|   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  |   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  | ||||||
|   { |   { | ||||||
|     this->Prepare(); |     this->Prepare(); | ||||||
|  |     double t0=usecond(); | ||||||
|     this->HaloGatherOpt(source,compress); |     this->HaloGatherOpt(source,compress); | ||||||
|  |     double t1=usecond(); | ||||||
|  |     timer0 += t1-t0; | ||||||
|  |     callsi++; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   template <class compressor> |   template <class compressor> | ||||||
| @@ -362,9 +352,12 @@ public: | |||||||
|     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; |     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; | ||||||
|     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; |     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; | ||||||
|  |  | ||||||
|  |     this->mpi3synctime_g-=usecond(); | ||||||
|     this->_grid->StencilBarrier(); |     this->_grid->StencilBarrier(); | ||||||
|  |     this->mpi3synctime_g+=usecond(); | ||||||
|  |  | ||||||
|     assert(source.Grid()==this->_grid); |     assert(source.Grid()==this->_grid); | ||||||
|  |     this->halogtime-=usecond(); | ||||||
|      |      | ||||||
|     this->u_comm_offset=0; |     this->u_comm_offset=0; | ||||||
|        |        | ||||||
| @@ -400,6 +393,7 @@ public: | |||||||
|     } |     } | ||||||
|     this->face_table_computed=1; |     this->face_table_computed=1; | ||||||
|     assert(this->u_comm_offset==this->_unified_buffer_size); |     assert(this->u_comm_offset==this->_unified_buffer_size); | ||||||
|  |     this->halogtime+=usecond(); | ||||||
|     accelerator_barrier(); |     accelerator_barrier(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -146,9 +146,6 @@ public: | |||||||
|   void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, |   void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||||
|                     const FermionField &in, FermionField &out, int dag); |                     const FermionField &in, FermionField &out, int dag); | ||||||
|  |  | ||||||
|   void DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, |  | ||||||
| 				  const FermionField &in, FermionField &out, int dag); |  | ||||||
|    |  | ||||||
|   void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, |   void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||||
|                     const FermionField &in, FermionField &out, int dag); |                     const FermionField &in, FermionField &out, int dag); | ||||||
|  |  | ||||||
| @@ -160,9 +157,6 @@ public: | |||||||
|  |  | ||||||
|   // DoubleStore impl dependent |   // DoubleStore impl dependent | ||||||
|   void ImportGauge(const GaugeField &_Umu); |   void ImportGauge(const GaugeField &_Umu); | ||||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; |  | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   // Data members require to support the functionality |   // Data members require to support the functionality | ||||||
|   | |||||||
| @@ -166,13 +166,6 @@ public: | |||||||
| 			       FermionField &out, | 			       FermionField &out, | ||||||
| 			       int dag); | 			       int dag); | ||||||
|      |      | ||||||
|   void DhopInternalDirichletComms(StencilImpl & st, |  | ||||||
| 				  LebesgueOrder &lo, |  | ||||||
| 				  DoubledGaugeField &U, |  | ||||||
| 				  const FermionField &in,  |  | ||||||
| 				  FermionField &out, |  | ||||||
| 				  int dag); |  | ||||||
|    |  | ||||||
|   // Constructors |   // Constructors | ||||||
|   WilsonFermion5D(GaugeField &_Umu, |   WilsonFermion5D(GaugeField &_Umu, | ||||||
| 		  GridCartesian         &FiveDimGrid, | 		  GridCartesian         &FiveDimGrid, | ||||||
| @@ -181,11 +174,19 @@ public: | |||||||
| 		  GridRedBlackCartesian &FourDimRedBlackGrid, | 		  GridRedBlackCartesian &FourDimRedBlackGrid, | ||||||
| 		  double _M5,const ImplParams &p= ImplParams()); | 		  double _M5,const ImplParams &p= ImplParams()); | ||||||
|      |      | ||||||
|  |   // Constructors | ||||||
|  |   /* | ||||||
|  |     WilsonFermion5D(int simd,  | ||||||
|  |     GaugeField &_Umu, | ||||||
|  |     GridCartesian         &FiveDimGrid, | ||||||
|  |     GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||||
|  |     GridCartesian         &FourDimGrid, | ||||||
|  |     double _M5,const ImplParams &p= ImplParams()); | ||||||
|  |   */ | ||||||
|  |      | ||||||
|   // DoubleStore |   // DoubleStore | ||||||
|   void ImportGauge(const GaugeField &_Umu); |   void ImportGauge(const GaugeField &_Umu); | ||||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; |      | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; |  | ||||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; |  | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   // Data members require to support the functionality |   // Data members require to support the functionality | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -243,17 +243,17 @@ typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffReal > WilsonImplR | |||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | ||||||
|  |  | ||||||
| //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | ||||||
| //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | ||||||
| //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | ||||||
|  |  | ||||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | ||||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | ||||||
|  |  | ||||||
| //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | ||||||
| //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | ||||||
| //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | ||||||
|   |   | ||||||
| typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | ||||||
| typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | ||||||
|   | |||||||
| @@ -39,7 +39,7 @@ NAMESPACE_BEGIN(Grid); | |||||||
| class WilsonKernelsStatic {  | class WilsonKernelsStatic {  | ||||||
| public: | public: | ||||||
|   enum { OptGeneric, OptHandUnroll, OptInlineAsm }; |   enum { OptGeneric, OptHandUnroll, OptInlineAsm }; | ||||||
|   enum { CommsAndCompute, CommsThenCompute, CommsDirichlet }; |   enum { CommsAndCompute, CommsThenCompute }; | ||||||
|   static int Opt;   |   static int Opt;   | ||||||
|   static int Comms; |   static int Comms; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -112,6 +112,7 @@ void CayleyFermion5D<Impl>::ImportUnphysicalFermion(const FermionField &input4d, | |||||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); |   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||||
|   imported5d=tmp; |   imported5d=tmp; | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl>   | template<class Impl>   | ||||||
| void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) | void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) | ||||||
| { | { | ||||||
| @@ -126,37 +127,6 @@ void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &inpu | |||||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); |   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||||
|   Dminus(tmp,imported5d); |   Dminus(tmp,imported5d); | ||||||
| } | } | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| // Added for fourD pseudofermion det estimation |  | ||||||
| //////////////////////////////////////////////////// |  | ||||||
| template<class Impl>   |  | ||||||
| void CayleyFermion5D<Impl>::ImportFourDimPseudoFermion(const FermionField &input4d,FermionField &imported5d) |  | ||||||
| { |  | ||||||
|   int Ls = this->Ls; |  | ||||||
|   FermionField tmp(this->FermionGrid()); |  | ||||||
|   conformable(imported5d.Grid(),this->FermionGrid()); |  | ||||||
|   conformable(input4d.Grid()   ,this->GaugeGrid()); |  | ||||||
|   tmp = Zero(); |  | ||||||
|   InsertSlice(input4d, tmp, 0   , 0); |  | ||||||
|   InsertSlice(input4d, tmp, Ls-1, 0); |  | ||||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, 0, 0); |  | ||||||
|   axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); |  | ||||||
|   imported5d=tmp; |  | ||||||
| } |  | ||||||
| template<class Impl>   |  | ||||||
| void CayleyFermion5D<Impl>::ExportFourDimPseudoFermion(const FermionField &solution5d,FermionField &exported4d) |  | ||||||
| { |  | ||||||
|   int Ls = this->Ls; |  | ||||||
|   FermionField tmp(this->FermionGrid()); |  | ||||||
|   tmp = solution5d; |  | ||||||
|   conformable(solution5d.Grid(),this->FermionGrid()); |  | ||||||
|   conformable(exported4d.Grid(),this->GaugeGrid()); |  | ||||||
|   axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); |  | ||||||
|   axpby_ssp_pplus (tmp, 1., tmp       , 1., solution5d, 0, Ls-1); |  | ||||||
|   ExtractSlice(exported4d, tmp, 0, 0); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Dminus |  | ||||||
| template<class Impl>   | template<class Impl>   | ||||||
| void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) | void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) | ||||||
| { | { | ||||||
| @@ -910,7 +880,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   std::vector<RealD> G_s(Ls,1.0); |   std::vector<RealD> G_s(Ls,1.0); | ||||||
|   RealD sign = 1; // sign flip for vector/tadpole |   Integer sign = 1; // sign flip for vector/tadpole | ||||||
|   if ( curr_type == Current::Axial ) { |   if ( curr_type == Current::Axial ) { | ||||||
|     for(int s=0;s<Ls/2;s++){ |     for(int s=0;s<Ls/2;s++){ | ||||||
|       G_s[s] = -1.0; |       G_s[s] = -1.0; | ||||||
| @@ -931,8 +901,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | |||||||
|   for(int s=0;s<Ls;s++){ |   for(int s=0;s<Ls;s++){ | ||||||
|  |  | ||||||
|     int sp = (s+1)%Ls; |     int sp = (s+1)%Ls; | ||||||
|     //    int sr = Ls-1-s; |     int sr = Ls-1-s; | ||||||
|     //    int srp= (sr+1)%Ls; |     int srp= (sr+1)%Ls; | ||||||
|  |  | ||||||
|     // Mobius parameters |     // Mobius parameters | ||||||
|     auto b=this->bs[s]; |     auto b=this->bs[s]; | ||||||
|   | |||||||
| @@ -51,9 +51,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, | |||||||
|   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), |   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), | ||||||
|   _FourDimGrid        (&FourDimGrid), |   _FourDimGrid        (&FourDimGrid), | ||||||
|   _FourDimRedBlackGrid(&FourDimRedBlackGrid), |   _FourDimRedBlackGrid(&FourDimRedBlackGrid), | ||||||
|   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p.locally_periodic,p), |   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p), | ||||||
|   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p.locally_periodic,p), // source is Even |   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even | ||||||
|   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p.locally_periodic,p), // source is Odd |   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd | ||||||
|   M5(_M5), |   M5(_M5), | ||||||
|   Umu(_FourDimGrid), |   Umu(_FourDimGrid), | ||||||
|   UmuEven(_FourDimRedBlackGrid), |   UmuEven(_FourDimRedBlackGrid), | ||||||
| @@ -361,21 +361,10 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, | |||||||
|                                          const FermionField &in, FermionField &out,int dag) |                                          const FermionField &in, FermionField &out,int dag) | ||||||
| { | { | ||||||
|   DhopTotalTime-=usecond(); |   DhopTotalTime-=usecond(); | ||||||
|  |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) | ||||||
|   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) |  | ||||||
| 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) |  | ||||||
|          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); |  | ||||||
|  |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { |  | ||||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); |     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||||
|   } |   else  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { |  | ||||||
|     DhopInternalSerialComms(st,lo,U,in,out,dag); |     DhopInternalSerialComms(st,lo,U,in,out,dag); | ||||||
|   } |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { |  | ||||||
|     DhopInternalDirichletComms(st,lo,U,in,out,dag); |  | ||||||
|   } |  | ||||||
|    |  | ||||||
|   DhopTotalTime+=usecond(); |   DhopTotalTime+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -442,30 +431,6 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | |||||||
|   DhopComputeTime2+=usecond(); |   DhopComputeTime2+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl> |  | ||||||
| void WilsonFermion5D<Impl>::DhopInternalDirichletComms(StencilImpl & st, LebesgueOrder &lo, |  | ||||||
| 						       DoubledGaugeField & U, |  | ||||||
| 						       const FermionField &in, FermionField &out,int dag) |  | ||||||
| { |  | ||||||
|   Compressor compressor(dag); |  | ||||||
|  |  | ||||||
|   int LLs = in.Grid()->_rdimensions[0]; |  | ||||||
|   int len =  U.Grid()->oSites(); |  | ||||||
|        |  | ||||||
|   ///////////////////////////// |  | ||||||
|   // do the compute interior |  | ||||||
|   ///////////////////////////// |  | ||||||
|   int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know |  | ||||||
|   DhopComputeTime-=usecond(); |  | ||||||
|   if (dag == DaggerYes) { |  | ||||||
|     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); |  | ||||||
|   } else { |  | ||||||
|     Kernels::DhopKernel   (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); |  | ||||||
|   } |  | ||||||
|   accelerator_barrier(); |  | ||||||
|   DhopComputeTime+=usecond(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | ||||||
|   | |||||||
| @@ -47,9 +47,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | |||||||
|     Kernels(p), |     Kernels(p), | ||||||
|     _grid(&Fgrid), |     _grid(&Fgrid), | ||||||
|     _cbgrid(&Hgrid), |     _cbgrid(&Hgrid), | ||||||
|     Stencil(&Fgrid, npoint, Even, directions, displacements,p.locally_periodic,p), |     Stencil(&Fgrid, npoint, Even, directions, displacements,p), | ||||||
|     StencilEven(&Hgrid, npoint, Even, directions,displacements,p.locally_periodic,p),  // source is Even |     StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even | ||||||
|     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p.locally_periodic,p),  // source is Odd |     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd | ||||||
|     mass(_mass), |     mass(_mass), | ||||||
|     Lebesgue(_grid), |     Lebesgue(_grid), | ||||||
|     LebesgueEvenOdd(_cbgrid), |     LebesgueEvenOdd(_cbgrid), | ||||||
| @@ -488,21 +488,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | |||||||
|                                        FermionField &out, int dag) |                                        FermionField &out, int dag) | ||||||
| { | { | ||||||
|   DhopTotalTime-=usecond(); |   DhopTotalTime-=usecond(); | ||||||
|  | #ifdef GRID_OMP | ||||||
|   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) |   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) | ||||||
| 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) |  | ||||||
|          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { |  | ||||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); |     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||||
|   } |   else | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { | #endif | ||||||
|     DhopInternalSerial(st,lo,U,in,out,dag); |     DhopInternalSerial(st,lo,U,in,out,dag); | ||||||
|   } |  | ||||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { |  | ||||||
|     DhopInternalDirichletComms(st,lo,U,in,out,dag); |  | ||||||
|   } |  | ||||||
|   DhopTotalTime+=usecond(); |   DhopTotalTime+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -571,29 +562,6 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO | |||||||
|   DhopComputeTime2+=usecond(); |   DhopComputeTime2+=usecond(); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class Impl> |  | ||||||
| void WilsonFermion<Impl>::DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, |  | ||||||
| 						     DoubledGaugeField &U, |  | ||||||
| 						     const FermionField &in, |  | ||||||
| 						     FermionField &out, int dag) |  | ||||||
| { |  | ||||||
|   assert((dag == DaggerNo) || (dag == DaggerYes)); |  | ||||||
|  |  | ||||||
|   Compressor compressor(dag); |  | ||||||
|   int len =  U.Grid()->oSites(); |  | ||||||
|  |  | ||||||
|   ///////////////////////////// |  | ||||||
|   // do the compute interior |  | ||||||
|   ///////////////////////////// |  | ||||||
|   int Opt = WilsonKernelsStatic::Opt; |  | ||||||
|   DhopComputeTime-=usecond(); |  | ||||||
|   if (dag == DaggerYes) { |  | ||||||
|     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); |  | ||||||
|   } else { |  | ||||||
|     Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); |  | ||||||
|   } |  | ||||||
|   DhopComputeTime+=usecond(); |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| template <class Impl> | template <class Impl> | ||||||
| void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, | void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, | ||||||
|   | |||||||
| @@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| //template<> void | template<> void | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| @@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||||
| // template<> void | template<> void | ||||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| @@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| // | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
| @@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|        |        | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // XYZT vectorised, dag Kernel, single | // XYZT vectorised, dag Kernel, single | ||||||
| @@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | |||||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| #undef MAYBEPERM | #undef MAYBEPERM | ||||||
| #undef MULT_2SPIN | #undef MULT_2SPIN | ||||||
| @@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau | |||||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| 				     | 				     | ||||||
| ///////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////// | ||||||
| // Ls vectorised, dag Kernel, single | // Ls vectorised, dag Kernel, single | ||||||
| @@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #define INTERIOR | #define INTERIOR | ||||||
| @@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #undef INTERIOR_AND_EXTERIOR | #undef INTERIOR_AND_EXTERIOR | ||||||
| #undef INTERIOR | #undef INTERIOR | ||||||
| @@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled | |||||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
| //template<> void | template<> void  | ||||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||||
|  |  | ||||||
| #endif  // VEC 5D | #endif  // VEC 5D | ||||||
|  |  | ||||||
|   | |||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../CayleyFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../ContinuedFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../DomainWallEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../MobiusEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../PartialFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonCloverFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonKernelsInstantiationGparity.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonTMFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | #define IMPLEMENTATION GparityWilsonImplDF | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../CayleyFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../ContinuedFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../DomainWallEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../MobiusEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../PartialFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonCloverFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonKernelsInstantiationGparity.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonTMFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | #define IMPLEMENTATION GparityWilsonImplFH | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../CayleyFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../ContinuedFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../DomainWallEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../MobiusEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../PartialFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonCloverFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermionInstantiation.cc.master | ||||||
| @@ -2,12 +2,14 @@ | |||||||
| 
 | 
 | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
| 
 | 
 | ||||||
| Source file: ./lib/qcd/hmc/DDHMC.h | Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2021 | Copyright (C) 2015, 2020 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: Christopher Kelly | Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Nils Meyer <nils.meyer@ur.de> Regensburg University | ||||||
| 
 | 
 | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -26,27 +28,24 @@ with this program; if not, write to the Free Software Foundation, Inc., | |||||||
| See the full license in the file "LICENSE" in the top level distribution | See the full license in the file "LICENSE" in the top level distribution | ||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| 			   /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  | #include <Grid/qcd/action/fermion/FermionCore.h> | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> | ||||||
|  | 
 | ||||||
|  | #ifndef AVX512 | ||||||
|  | #ifndef QPX | ||||||
|  | #ifndef A64FX | ||||||
|  | #ifndef A64FXFIXEDSIZE | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
| ////////////////////////////////////////////////////
 |  | ||||||
| // DDHMC filter with sub-block size B[mu]
 |  | ||||||
| ////////////////////////////////////////////////////
 |  | ||||||
| 
 | 
 | ||||||
| template<typename MomentaField> | #include "impl.h" | ||||||
| struct DDHMCFilter: public MomentumFilterBase<MomentaField> | template class WilsonKernels<IMPLEMENTATION>; | ||||||
| { |  | ||||||
|   Coordinate Block; |  | ||||||
|   int Width; |  | ||||||
|    |  | ||||||
|   DDHMCFilter(const Coordinate &_Block): Block(_Block) {} |  | ||||||
| 
 |  | ||||||
|   void applyFilter(MomentaField &P) const override |  | ||||||
|   { |  | ||||||
|     DomainDecomposition Domains(Block); |  | ||||||
|     Domains.ProjectDDHMC(P); |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
| 
 |  | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonTMFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | #define IMPLEMENTATION WilsonImplDF | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../CayleyFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../ContinuedFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../DomainWallEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../MobiusEOFAFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../PartialFractionFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonCloverFermionInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermion5DInstantiation.cc.master | ||||||
| @@ -0,0 +1 @@ | |||||||
|  | ../WilsonFermionInstantiation.cc.master | ||||||
| @@ -2,11 +2,14 @@ | |||||||
| 
 | 
 | ||||||
| Grid physics library, www.github.com/paboyle/Grid | Grid physics library, www.github.com/paboyle/Grid | ||||||
| 
 | 
 | ||||||
| Source file: ./lib/qcd/action/momentum/Domains.h | Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2021 | Copyright (C) 2015, 2020 | ||||||
| 
 | 
 | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||||
|  | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Nils Meyer <nils.meyer@ur.de> Regensburg University | ||||||
| 
 | 
 | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -25,15 +28,24 @@ with this program; if not, write to the Free Software Foundation, Inc., | |||||||
| See the full license in the file "LICENSE" in the top level distribution | See the full license in the file "LICENSE" in the top level distribution | ||||||
| directory | directory | ||||||
| *************************************************************************************/ | *************************************************************************************/ | ||||||
| 			   /*  END LEGAL */ | /*  END LEGAL */ | ||||||
|  | #include <Grid/qcd/action/fermion/FermionCore.h> | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> | ||||||
| 
 | 
 | ||||||
| ////////////////////////////////////////////////////
 | #ifndef AVX512 | ||||||
| // Dirichlet filter with sub-block size B[mu]
 | #ifndef QPX | ||||||
| ////////////////////////////////////////////////////
 | #ifndef A64FX | ||||||
| #pragma once  | #ifndef A64FXFIXEDSIZE | ||||||
|  | #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| #include <Grid/qcd/action/domains/DomainDecomposition.h> | NAMESPACE_BEGIN(Grid); | ||||||
| #include <Grid/qcd/action/domains/MomentumFilter.h> |  | ||||||
| #include <Grid/qcd/action/domains/DirichletFilter.h> |  | ||||||
| #include <Grid/qcd/action/domains/DDHMCFilter.h> |  | ||||||
| 
 | 
 | ||||||
|  | #include "impl.h" | ||||||
|  | template class WilsonKernels<IMPLEMENTATION>; | ||||||
|  | 
 | ||||||
|  | NAMESPACE_END(Grid); | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user