mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-29 19:14:33 +00:00 
			
		
		
		
	Compare commits
	
		
			14 Commits
		
	
	
		
			feature/dd
			...
			feature/bl
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | b2493d6d25 | ||
|  | 8fd16686dc | ||
|  | 23b9c6b5f5 | ||
|  | 43943008bf | ||
|  | 8ee26f9112 | ||
|  | f91e3af97f | ||
|  | 43298ef681 | ||
|  | 7e70df27e4 | ||
|  | c55d657736 | ||
|  | b89b1280d5 | ||
|  | ac7090e6d3 | ||
|  | 02edbe624f | ||
|  | 9266b89ad8 | ||
|  | 2db7e6f8ab | 
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -10,6 +10,8 @@ | ||||
| *~ | ||||
| *# | ||||
| *.sublime-* | ||||
| .ctags | ||||
| tags | ||||
|  | ||||
| # Precompiled Headers # | ||||
| ####################### | ||||
| @@ -88,7 +90,6 @@ Thumbs.db | ||||
| # build directory # | ||||
| ################### | ||||
| build*/* | ||||
| Documentation/_build | ||||
|  | ||||
| # IDE related files # | ||||
| ##################### | ||||
|   | ||||
| @@ -34,9 +34,6 @@ directory | ||||
|  | ||||
| #if defined __GNUC__ && __GNUC__>=6 | ||||
| #pragma GCC diagnostic ignored "-Wignored-attributes" | ||||
| #endif | ||||
| #if defined __GNUC__ && __GNUC__>=6 | ||||
| #pragma GCC diagnostic ignored "-Wpsabi" | ||||
| #endif | ||||
|  | ||||
|  //disables and intel compiler specific warning (in json.hpp) | ||||
|   | ||||
| @@ -36,7 +36,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/qcd/QCD.h> | ||||
| #include <Grid/qcd/spin/Spin.h> | ||||
| #include <Grid/qcd/gparity/Gparity.h> | ||||
| #include <Grid/qcd/utils/Utils.h> | ||||
| #include <Grid/qcd/representations/Representations.h> | ||||
| NAMESPACE_CHECK(GridQCDCore); | ||||
|   | ||||
| @@ -54,7 +54,6 @@ NAMESPACE_CHECK(BiCGSTAB); | ||||
| #include <Grid/algorithms/iterative/SchurRedBlack.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h> | ||||
| #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h> | ||||
| #include <Grid/algorithms/iterative/BlockConjugateGradient.h> | ||||
| #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h> | ||||
|   | ||||
| @@ -442,8 +442,6 @@ public: | ||||
|     for(int p=0; p<geom.npoint; p++) | ||||
|       points[p] = geom.points_dagger[p]; | ||||
|  | ||||
|     auto points_p = &points[0]; | ||||
|  | ||||
|     RealD* dag_factor_p = &dag_factor[0]; | ||||
|  | ||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||
| @@ -455,7 +453,7 @@ public: | ||||
|       StencilEntry *SE; | ||||
|  | ||||
|       for(int p=0;p<geom_v.npoint;p++){ | ||||
|         int point = points_p[p]; | ||||
|         int point = points[p]; | ||||
|  | ||||
| 	SE=Stencil_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
| @@ -710,8 +708,6 @@ public: | ||||
|     for(int p=0; p<npoint; p++) | ||||
|       points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; | ||||
|  | ||||
|     auto points_p = &points[0]; | ||||
|  | ||||
|     Vector<Aview> AcceleratorViewContainer; | ||||
|     for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); | ||||
|     Aview *Aview_p = & AcceleratorViewContainer[0]; | ||||
| @@ -732,7 +728,7 @@ public: | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         for(int p=0;p<npoint;p++){ | ||||
|           int point = points_p[p]; | ||||
|           int point = points[p]; | ||||
|           SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|           if(SE->_is_local) { | ||||
| @@ -758,7 +754,7 @@ public: | ||||
|         StencilEntry *SE; | ||||
|  | ||||
|         for(int p=0;p<npoint;p++){ | ||||
|           int point = points_p[p]; | ||||
|           int point = points[p]; | ||||
|           SE=st_v.GetEntry(ptype,point,ss); | ||||
|  | ||||
|           if(SE->_is_local) { | ||||
|   | ||||
| @@ -136,7 +136,7 @@ public: | ||||
|     flops=0; | ||||
|     usec =0; | ||||
|     Coordinate layout(Nd,1); | ||||
|     sgrid = new GridCartesian(dimensions,layout,processors,*grid); | ||||
|     sgrid = new GridCartesian(dimensions,layout,processors); | ||||
|   }; | ||||
|      | ||||
|   ~FFT ( void)  { | ||||
| @@ -182,7 +182,7 @@ public: | ||||
|     pencil_gd[dim] = G*processors[dim]; | ||||
|        | ||||
|     // Pencil global vol LxLxGxLxL per node | ||||
|     GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid); | ||||
|     GridCartesian pencil_g(pencil_gd,layout,processors); | ||||
|        | ||||
|     // Construct pencils | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|   | ||||
| @@ -223,14 +223,9 @@ class SchurOperatorBase :  public LinearOperatorBase<Field> { | ||||
|     Mpc(in,tmp); | ||||
|     MpcDag(tmp,out); | ||||
|   } | ||||
|   virtual  void MpcMpcDag(const Field &in, Field &out) { | ||||
|     Field tmp(in.Grid()); | ||||
|     tmp.Checkerboard() = in.Checkerboard(); | ||||
|     MpcDag(in,tmp); | ||||
|     Mpc(tmp,out); | ||||
|   } | ||||
|   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|     HermOp(in,out); | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|     MpcDagMpc(in,out); | ||||
|     ComplexD dot= innerProduct(in,out);  | ||||
|     n1=real(dot); | ||||
|     n2=norm2(out); | ||||
| @@ -281,16 +276,6 @@ template<class Matrix,class Field> | ||||
|       axpy(out,-1.0,tmp,out); | ||||
|     } | ||||
| }; | ||||
| // Mpc MpcDag system presented as the HermOp | ||||
| template<class Matrix,class Field> | ||||
| class SchurDiagMooeeDagOperator :  public SchurDiagMooeeOperator<Matrix,Field> { | ||||
|  public: | ||||
|   virtual void HermOp(const Field &in, Field &out){ | ||||
|     out.Checkerboard() = in.Checkerboard(); | ||||
|     this->MpcMpcDag(in,out); | ||||
|   } | ||||
|   SchurDiagMooeeDagOperator (Matrix &Mat): SchurDiagMooeeOperator<Matrix,Field>(Mat){}; | ||||
| }; | ||||
| template<class Matrix,class Field> | ||||
|   class SchurDiagOneOperator :  public SchurOperatorBase<Field> { | ||||
|  protected: | ||||
| @@ -545,16 +530,6 @@ public: | ||||
| template<class Field> class LinearFunction { | ||||
| public: | ||||
|   virtual void operator() (const Field &in, Field &out) = 0; | ||||
|  | ||||
|   virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out) | ||||
|   { | ||||
|     assert(in.size() == out.size()); | ||||
|  | ||||
|     for (unsigned int i = 0; i < in.size(); ++i) | ||||
|     { | ||||
|       (*this)(in[i], out[i]); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||
|   | ||||
| @@ -240,12 +240,14 @@ public: | ||||
|     Field T0(grid); T0 = in;   | ||||
|     Field T1(grid);  | ||||
|     Field T2(grid); | ||||
|     Field Tout(grid); | ||||
|     Field y(grid); | ||||
|        | ||||
|     Field *Tnm = &T0; | ||||
|     Field *Tn  = &T1; | ||||
|     Field *Tnp = &T2; | ||||
|  | ||||
|     std::cout << GridLogMessage << "Chebyshev() starts"<<std::endl; | ||||
|     // Tn=T1 = (xscale M + mscale)in | ||||
|     RealD xscale = 2.0/(hi-lo); | ||||
|     RealD mscale = -(hi+lo)/(hi-lo); | ||||
| @@ -254,7 +256,7 @@ public: | ||||
|  | ||||
|     // sum = .5 c[0] T0 + c[1] T1 | ||||
|     //    out = ()*T0 + Coeffs[1]*T1; | ||||
|     axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1); | ||||
|     axpby(Tout,0.5*Coeffs[0],Coeffs[1],T0,T1); | ||||
|     for(int n=2;n<order;n++){ | ||||
|  | ||||
|       Linop.HermOp(*Tn,y); | ||||
| @@ -275,7 +277,7 @@ public: | ||||
|       axpby(y,xscale,mscale,y,(*Tn)); | ||||
|       axpby(*Tnp,2.0,-1.0,y,(*Tnm)); | ||||
|       if ( Coeffs[n] != 0.0) { | ||||
| 	axpy(out,Coeffs[n],*Tnp,out); | ||||
| 	axpy(Tout,Coeffs[n],*Tnp,Tout); | ||||
|       } | ||||
| #endif | ||||
|       // Cycle pointers to avoid copies | ||||
| @@ -285,6 +287,8 @@ public: | ||||
|       Tnp    =swizzle; | ||||
| 	   | ||||
|     } | ||||
|     out = Tout; | ||||
|     std::cout << GridLogMessage << "Chebyshev() ends"<<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| @@ -292,7 +296,6 @@ public: | ||||
| template<class Field> | ||||
| class ChebyshevLanczos : public Chebyshev<Field> { | ||||
| private: | ||||
|  | ||||
|   std::vector<RealD> Coeffs; | ||||
|   int order; | ||||
|   RealD alpha; | ||||
| @@ -378,24 +381,26 @@ public: | ||||
|     Field T0(grid); T0 = in;   | ||||
|     Field T1(grid);  | ||||
|     Field T2(grid); | ||||
|     Field Tout(grid); | ||||
|     Field  y(grid); | ||||
|        | ||||
|     Field *Tnm = &T0; | ||||
|     Field *Tn  = &T1; | ||||
|     Field *Tnp = &T2; | ||||
|  | ||||
|     std::cout << GridLogMessage << "ChebyshevLanczos() starts"<<std::endl; | ||||
|     // Tn=T1 = (xscale M )*in | ||||
|     AminusMuSq(Linop,T0,T1); | ||||
|  | ||||
|     // sum = .5 c[0] T0 + c[1] T1 | ||||
|     out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1; | ||||
|     Tout = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1; | ||||
|     for(int n=2;n<order;n++){ | ||||
| 	 | ||||
|       AminusMuSq(Linop,*Tn,y); | ||||
|  | ||||
|       *Tnp=2.0*y-(*Tnm); | ||||
|  | ||||
|       out=out+Coeffs[n]* (*Tnp); | ||||
|       Tout=Tout+Coeffs[n]* (*Tnp); | ||||
|  | ||||
|       // Cycle pointers to avoid copies | ||||
|       Field *swizzle = Tnm; | ||||
| @@ -404,6 +409,8 @@ public: | ||||
|       Tnp    =swizzle; | ||||
| 	   | ||||
|     } | ||||
|     out=Tout; | ||||
|     std::cout << GridLogMessage << "ChebyshevLanczos() ends"<<std::endl; | ||||
|   } | ||||
| }; | ||||
| NAMESPACE_END(Grid); | ||||
|   | ||||
| @@ -102,7 +102,7 @@ public: | ||||
|     // Check if guess is really REALLY good :) | ||||
|     if (cp <= rsq) { | ||||
|       TrueResidual = std::sqrt(a/ssq); | ||||
|       std::cout << GridLogMessage << "ConjugateGradient guess is converged already "<<TrueResidual<< " tol "<< Tolerance<< std::endl; | ||||
|       std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl; | ||||
|       IterationsToComplete = 0;	 | ||||
|       return; | ||||
|     } | ||||
|   | ||||
| @@ -48,29 +48,19 @@ NAMESPACE_BEGIN(Grid); | ||||
|     Integer TotalInnerIterations; //Number of inner CG iterations | ||||
|     Integer TotalOuterIterations; //Number of restarts | ||||
|     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step | ||||
|     RealD TrueResidual; | ||||
|  | ||||
|     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess | ||||
|     LinearFunction<FieldF> *guesser; | ||||
|  | ||||
|     MixedPrecisionConjugateGradient(RealD Tol, | ||||
| 				    Integer maxinnerit,  | ||||
| 				    Integer maxouterit,  | ||||
| 				    GridBase* _sp_grid,  | ||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|       MixedPrecisionConjugateGradient(Tol, Tol, maxinnerit, maxouterit, _sp_grid, _Linop_f, _Linop_d) {}; | ||||
|  | ||||
|     MixedPrecisionConjugateGradient(RealD Tol, | ||||
| 				    RealD InnerTol, | ||||
|      | ||||
|     MixedPrecisionConjugateGradient(RealD tol,  | ||||
| 				    Integer maxinnerit,  | ||||
| 				    Integer maxouterit,  | ||||
| 				    GridBase* _sp_grid,  | ||||
| 				    LinearOperatorBase<FieldF> &_Linop_f,  | ||||
| 				    LinearOperatorBase<FieldD> &_Linop_d) : | ||||
|       Linop_f(_Linop_f), Linop_d(_Linop_d), | ||||
|       Tolerance(Tol), InnerTolerance(InnerTol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), guesser(NULL){ assert(InnerTol < 1.0e-1);}; | ||||
|       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), | ||||
|       OuterLoopNormMult(100.), guesser(NULL){ }; | ||||
|  | ||||
|     void useGuesser(LinearFunction<FieldF> &g){ | ||||
|       guesser = &g; | ||||
| @@ -89,11 +79,6 @@ NAMESPACE_BEGIN(Grid); | ||||
|     RealD stop = src_norm * Tolerance*Tolerance; | ||||
|  | ||||
|     GridBase* DoublePrecGrid = src_d_in.Grid(); | ||||
|  | ||||
|     //Generate precision change workspaces | ||||
|     precisionChangeWorkspace wk_dp_from_sp(DoublePrecGrid, SinglePrecGrid); | ||||
|     precisionChangeWorkspace wk_sp_from_dp(SinglePrecGrid, DoublePrecGrid); | ||||
|  | ||||
|     FieldD tmp_d(DoublePrecGrid); | ||||
|     tmp_d.Checkerboard() = cb; | ||||
|      | ||||
| @@ -134,7 +119,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ?? | ||||
|  | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChange(src_f, src_d, wk_sp_from_dp); | ||||
|       precisionChange(src_f, src_d); | ||||
|       PrecChangeTimer.Stop(); | ||||
|        | ||||
|       sol_f = Zero(); | ||||
| @@ -152,7 +137,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|        | ||||
|       //Convert sol back to double and add to double prec solution | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChange(tmp_d, sol_f, wk_dp_from_sp); | ||||
|       precisionChange(tmp_d, sol_f); | ||||
|       PrecChangeTimer.Stop(); | ||||
|        | ||||
|       axpy(sol_d, 1.0, tmp_d, sol_d); | ||||
| @@ -164,7 +149,6 @@ NAMESPACE_BEGIN(Grid); | ||||
|     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); | ||||
|     CG_d(Linop_d, src_d_in, sol_d); | ||||
|     TotalFinalStepIterations = CG_d.IterationsToComplete; | ||||
|     TrueResidual = CG_d.TrueResidual; | ||||
|  | ||||
|     TotalTimer.Stop(); | ||||
|     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; | ||||
|   | ||||
| @@ -52,7 +52,7 @@ public: | ||||
|   MultiShiftFunction shifts; | ||||
|   std::vector<RealD> TrueResidualShift; | ||||
|  | ||||
|   ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) :  | ||||
|   ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :  | ||||
|     MaxIterations(maxit), | ||||
|     shifts(_shifts) | ||||
|   {  | ||||
| @@ -182,9 +182,6 @@ public: | ||||
|     for(int s=0;s<nshift;s++) { | ||||
|       axpby(psi[s],0.,-bs[s]*alpha[s],src,src); | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl; | ||||
|      | ||||
|    | ||||
|   /////////////////////////////////////// | ||||
|   // Timers | ||||
|   | ||||
| @@ -1,411 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Christopher Kelly <ckelly@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H | ||||
| #define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| //CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.  | ||||
| //The residual is stored in single precision, but the search directions and solution are stored in double precision.  | ||||
| //Every update_freq iterations the residual is corrected in double precision.  | ||||
|      | ||||
| //For safety the a final regular CG is applied to clean up if necessary | ||||
|  | ||||
| //Linop to add shift to input linop, used in cleanup CG | ||||
| namespace ConjugateGradientMultiShiftMixedPrecSupport{ | ||||
| template<typename Field> | ||||
| class ShiftedLinop: public LinearOperatorBase<Field>{ | ||||
| public: | ||||
|   LinearOperatorBase<Field> &linop_base; | ||||
|   RealD shift; | ||||
|  | ||||
|   ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){} | ||||
|  | ||||
|   void OpDiag (const Field &in, Field &out){ assert(0); } | ||||
|   void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); } | ||||
|   void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } | ||||
|    | ||||
|   void Op     (const Field &in, Field &out){ assert(0); } | ||||
|   void AdjOp  (const Field &in, Field &out){ assert(0); } | ||||
|  | ||||
|   void HermOp(const Field &in, Field &out){ | ||||
|     linop_base.HermOp(in, out); | ||||
|     axpy(out, shift, in, out); | ||||
|   }     | ||||
|  | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|     HermOp(in,out); | ||||
|     ComplexD dot = innerProduct(in,out); | ||||
|     n1=real(dot); | ||||
|     n2=norm2(out); | ||||
|   } | ||||
| }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<class FieldD, class FieldF, | ||||
| 	 typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, | ||||
| 	 typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>  | ||||
| class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>, | ||||
| 					     public OperatorFunction<FieldD> | ||||
| { | ||||
| public:                                                 | ||||
|  | ||||
|   using OperatorFunction<FieldD>::operator(); | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|   std::vector<int> IterationsToCompleteShift;  // Iterations for this shift | ||||
|   int verbose; | ||||
|   MultiShiftFunction shifts; | ||||
|   std::vector<RealD> TrueResidualShift; | ||||
|  | ||||
|   int ReliableUpdateFreq; //number of iterations between reliable updates | ||||
|  | ||||
|   GridBase* SinglePrecGrid; //Grid for single-precision fields | ||||
|   LinearOperatorBase<FieldF> &Linop_f; //single precision | ||||
|  | ||||
|   ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts, | ||||
| 				       GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f, | ||||
| 				       int _ReliableUpdateFreq | ||||
| 				       ) :  | ||||
|     MaxIterations(maxit),  shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq) | ||||
|   {  | ||||
|     verbose=1; | ||||
|     IterationsToCompleteShift.resize(_shifts.order); | ||||
|     TrueResidualShift.resize(_shifts.order); | ||||
|   } | ||||
|  | ||||
|   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi) | ||||
|   { | ||||
|     GridBase *grid = src.Grid(); | ||||
|     int nshift = shifts.order; | ||||
|     std::vector<FieldD> results(nshift,grid); | ||||
|     (*this)(Linop,src,results,psi); | ||||
|   } | ||||
|   void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi) | ||||
|   { | ||||
|     int nshift = shifts.order; | ||||
|  | ||||
|     (*this)(Linop,src,results); | ||||
|    | ||||
|     psi = shifts.norm*src; | ||||
|     for(int i=0;i<nshift;i++){ | ||||
|       psi = psi + shifts.residues[i]*results[i]; | ||||
|     } | ||||
|  | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d) | ||||
|   {  | ||||
|     GridBase *DoublePrecGrid = src_d.Grid(); | ||||
|     precisionChangeWorkspace wk_f_from_d(SinglePrecGrid, DoublePrecGrid); | ||||
|     precisionChangeWorkspace wk_d_from_f(DoublePrecGrid, SinglePrecGrid); | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     // Convenience references to the info stored in "MultiShiftFunction" | ||||
|     //////////////////////////////////////////////////////////////////////// | ||||
|     int nshift = shifts.order; | ||||
|  | ||||
|     std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts" | ||||
|     std::vector<RealD> &mresidual(shifts.tolerances); | ||||
|     std::vector<RealD> alpha(nshift,1.0); | ||||
|  | ||||
|     //Double precision search directions | ||||
|     FieldD p_d(DoublePrecGrid); | ||||
|     std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision) | ||||
|  | ||||
|     FieldD tmp_d(DoublePrecGrid); | ||||
|     FieldD r_d(DoublePrecGrid); | ||||
|     FieldD mmp_d(DoublePrecGrid); | ||||
|  | ||||
|     assert(psi_d.size()==nshift); | ||||
|     assert(mass.size()==nshift); | ||||
|     assert(mresidual.size()==nshift); | ||||
|    | ||||
|     // dynamic sized arrays on stack; 2d is a pain with vector | ||||
|     RealD  bs[nshift]; | ||||
|     RealD  rsq[nshift]; | ||||
|     RealD  z[nshift][2]; | ||||
|     int     converged[nshift]; | ||||
|    | ||||
|     const int       primary =0; | ||||
|    | ||||
|     //Primary shift fields CG iteration | ||||
|     RealD a,b,c,d; | ||||
|     RealD cp,bp,qq; //prev | ||||
|    | ||||
|     // Matrix mult fields | ||||
|     FieldF r_f(SinglePrecGrid); | ||||
|     FieldF p_f(SinglePrecGrid); | ||||
|     FieldF tmp_f(SinglePrecGrid); | ||||
|     FieldF mmp_f(SinglePrecGrid); | ||||
|     FieldF src_f(SinglePrecGrid); | ||||
|     precisionChange(src_f, src_d, wk_f_from_d); | ||||
|  | ||||
|     // Check lightest mass | ||||
|     for(int s=0;s<nshift;s++){ | ||||
|       assert( mass[s]>= mass[primary] ); | ||||
|       converged[s]=0; | ||||
|     } | ||||
|    | ||||
|     // Wire guess to zero | ||||
|     // Residuals "r" are src | ||||
|     // First search direction "p" is also src | ||||
|     cp = norm2(src_d); | ||||
|  | ||||
|     // Handle trivial case of zero src. | ||||
|     if( cp == 0. ){ | ||||
|       for(int s=0;s<nshift;s++){ | ||||
| 	psi_d[s] = Zero(); | ||||
| 	IterationsToCompleteShift[s] = 1; | ||||
| 	TrueResidualShift[s] = 0.; | ||||
|       } | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     for(int s=0;s<nshift;s++){ | ||||
|       rsq[s] = cp * mresidual[s] * mresidual[s]; | ||||
|       std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl; | ||||
|       ps_d[s] = src_d; | ||||
|     } | ||||
|     // r and p for primary | ||||
|     r_f=src_f; //residual maintained in single | ||||
|     p_f=src_f; | ||||
|     p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys | ||||
|    | ||||
|     //MdagM+m[0] | ||||
|     Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) | ||||
|     axpy(mmp_f,mass[0],p_f,mmp_f); | ||||
|     RealD rn = norm2(p_f); | ||||
|     d += rn*mass[0]; | ||||
|  | ||||
|     b = -cp /d; | ||||
|    | ||||
|     // Set up the various shift variables | ||||
|     int       iz=0; | ||||
|     z[0][1-iz] = 1.0; | ||||
|     z[0][iz]   = 1.0; | ||||
|     bs[0]      = b; | ||||
|     for(int s=1;s<nshift;s++){ | ||||
|       z[s][1-iz] = 1.0; | ||||
|       z[s][iz]   = 1.0/( 1.0 - b*(mass[s]-mass[0])); | ||||
|       bs[s]      = b*z[s][iz];  | ||||
|     } | ||||
|    | ||||
|     // r += b[0] A.p[0] | ||||
|     // c= norm(r) | ||||
|     c=axpy_norm(r_f,b,mmp_f,r_f); | ||||
|    | ||||
|     for(int s=0;s<nshift;s++) { | ||||
|       axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d); | ||||
|     } | ||||
|    | ||||
|     /////////////////////////////////////// | ||||
|     // Timers | ||||
|     /////////////////////////////////////// | ||||
|     GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer; | ||||
|  | ||||
|     SolverTimer.Start(); | ||||
|    | ||||
|     // Iteration loop | ||||
|     int k; | ||||
|    | ||||
|     for (k=1;k<=MaxIterations;k++){     | ||||
|       a = c /cp; | ||||
|  | ||||
|       //Update double precision search direction by residual | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChange(r_d, r_f, wk_d_from_f); | ||||
|       PrecChangeTimer.Stop(); | ||||
|  | ||||
|       AXPYTimer.Start(); | ||||
|       axpy(p_d,a,p_d,r_d);  | ||||
|  | ||||
|       for(int s=0;s<nshift;s++){ | ||||
| 	if ( ! converged[s] ) {  | ||||
| 	  if (s==0){ | ||||
| 	    axpy(ps_d[s],a,ps_d[s],r_d); | ||||
| 	  } else{ | ||||
| 	    RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b); | ||||
| 	    axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]); | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|       AXPYTimer.Stop(); | ||||
|  | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChange(p_f, p_d, wk_f_from_d); //get back single prec search direction for linop | ||||
|       PrecChangeTimer.Stop(); | ||||
|  | ||||
|       cp=c; | ||||
|       MatrixTimer.Start();   | ||||
|       Linop_f.HermOp(p_f,mmp_f);  | ||||
|       d=real(innerProduct(p_f,mmp_f));     | ||||
|       MatrixTimer.Stop();   | ||||
|  | ||||
|       AXPYTimer.Start(); | ||||
|       axpy(mmp_f,mass[0],p_f,mmp_f); | ||||
|       AXPYTimer.Stop(); | ||||
|       RealD rn = norm2(p_f); | ||||
|       d += rn*mass[0]; | ||||
|      | ||||
|       bp=b; | ||||
|       b=-cp/d; | ||||
|      | ||||
|       // Toggle the recurrence history | ||||
|       bs[0] = b; | ||||
|       iz = 1-iz; | ||||
|       ShiftTimer.Start(); | ||||
|       for(int s=1;s<nshift;s++){ | ||||
| 	if((!converged[s])){ | ||||
| 	  RealD z0 = z[s][1-iz]; | ||||
| 	  RealD z1 = z[s][iz]; | ||||
| 	  z[s][iz] = z0*z1*bp | ||||
| 	    / (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));  | ||||
| 	  bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike | ||||
| 	} | ||||
|       } | ||||
|       ShiftTimer.Stop(); | ||||
|  | ||||
|       //Update double precision solutions | ||||
|       AXPYTimer.Start(); | ||||
|       for(int s=0;s<nshift;s++){ | ||||
| 	int ss = s; | ||||
| 	if( (!converged[s]) ) {  | ||||
| 	  axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]); | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       //Perform reliable update if necessary; otherwise update residual from single-prec mmp | ||||
|       RealD c_f = axpy_norm(r_f,b,mmp_f,r_f); | ||||
|       AXPYTimer.Stop(); | ||||
|  | ||||
|       c = c_f; | ||||
|  | ||||
|       if(k % ReliableUpdateFreq == 0){ | ||||
| 	//Replace r with true residual | ||||
| 	MatrixTimer.Start();   | ||||
| 	Linop_d.HermOp(psi_d[0],mmp_d);  | ||||
| 	MatrixTimer.Stop();   | ||||
|  | ||||
| 	AXPYTimer.Start(); | ||||
| 	axpy(mmp_d,mass[0],psi_d[0],mmp_d); | ||||
|  | ||||
| 	RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d); | ||||
| 	AXPYTimer.Stop(); | ||||
|  | ||||
| 	std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl; | ||||
| 	 | ||||
| 	PrecChangeTimer.Start(); | ||||
| 	precisionChange(r_f, r_d, wk_f_from_d); | ||||
| 	PrecChangeTimer.Stop(); | ||||
| 	c = c_d; | ||||
|       } | ||||
|      | ||||
|       // Convergence checks | ||||
|       int all_converged = 1; | ||||
|       for(int s=0;s<nshift;s++){ | ||||
|        | ||||
| 	if ( (!converged[s]) ){ | ||||
| 	  IterationsToCompleteShift[s] = k; | ||||
| 	 | ||||
| 	  RealD css  = c * z[s][iz]* z[s][iz]; | ||||
| 	 | ||||
| 	  if(css<rsq[s]){ | ||||
| 	    if ( ! converged[s] ) | ||||
| 	      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl; | ||||
| 	    converged[s]=1; | ||||
| 	  } else { | ||||
| 	    all_converged=0; | ||||
| 	  } | ||||
|  | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       if ( all_converged ){ | ||||
|  | ||||
| 	SolverTimer.Stop(); | ||||
| 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl; | ||||
| 	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl; | ||||
|        | ||||
| 	// Check answers  | ||||
| 	for(int s=0; s < nshift; s++) {  | ||||
| 	  Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq); | ||||
| 	  axpy(tmp_d,mass[s],psi_d[s],mmp_d); | ||||
| 	  axpy(r_d,-alpha[s],src_d,tmp_d); | ||||
| 	  RealD rn = norm2(r_d); | ||||
| 	  RealD cn = norm2(src_d); | ||||
| 	  TrueResidualShift[s] = std::sqrt(rn/cn); | ||||
| 	  std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl; | ||||
|  | ||||
| 	  //If we have not reached the desired tolerance, do a (mixed precision) CG cleanup | ||||
| 	  if(rn >= rsq[s]){ | ||||
| 	    CleanupTimer.Start(); | ||||
| 	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl; | ||||
|  | ||||
| 	    //Setup linear operators for final cleanup | ||||
| 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]); | ||||
| 	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]); | ||||
| 					        | ||||
| 	    MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);  | ||||
| 	    cg(src_d, psi_d[s]); | ||||
| 	     | ||||
| 	    TrueResidualShift[s] = cg.TrueResidual; | ||||
| 	    CleanupTimer.Stop(); | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tSolver    " << SolverTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\t\tAXPY    " << AXPYTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\t\tMatrix    " << MatrixTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\t\tShift    " << ShiftTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed()     <<std::endl; | ||||
| 	std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl; | ||||
|  | ||||
| 	IterationsToComplete = k;	 | ||||
|  | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     | ||||
|     } | ||||
|     // ugly hack | ||||
|     std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl; | ||||
|     //  assert(0); | ||||
|   } | ||||
|  | ||||
| }; | ||||
| NAMESPACE_END(Grid); | ||||
| #endif | ||||
| @@ -54,23 +54,15 @@ class DeflatedGuesser: public LinearFunction<Field> { | ||||
| private: | ||||
|   const std::vector<Field> &evec; | ||||
|   const std::vector<RealD> &eval; | ||||
|   const unsigned int       N; | ||||
|  | ||||
| public: | ||||
|  | ||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) | ||||
|   : DeflatedGuesser(_evec, _eval, _evec.size()) | ||||
|   {} | ||||
|  | ||||
|   DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N) | ||||
|   : evec(_evec), eval(_eval), N(_N) | ||||
|   { | ||||
|     assert(evec.size()==eval.size()); | ||||
|     assert(N <= evec.size()); | ||||
|   }  | ||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; | ||||
|  | ||||
|   virtual void operator()(const Field &src,Field &guess) { | ||||
|     guess = Zero(); | ||||
|     assert(evec.size()==eval.size()); | ||||
|     auto N = evec.size(); | ||||
|     for (int i=0;i<N;i++) { | ||||
|       const Field& tmp = evec[i]; | ||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||
|   | ||||
							
								
								
									
										1555
									
								
								Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1555
									
								
								Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -40,7 +40,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    *        (-MoeMee^{-1}   1 )    | ||||
|    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) | ||||
|    *           ( 0       1                    ) | ||||
|    * L^{-dag}= ( 1      -Mee^{-dag} Moe^{dag} ) | ||||
|    * L^{-d}  = ( 1      -Mee^{-dag} Moe^{dag} ) | ||||
|    *           ( 0       1                    ) | ||||
|    * | ||||
|    * U^-1 = (1   -Mee^{-1} Meo) | ||||
| @@ -82,8 +82,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    * c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1}  eta_o | ||||
|    *                              eta_o'     = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e) | ||||
|    *                              psi_o = M_oo^-1 phi_o | ||||
|    * | ||||
|    * | ||||
|    * TODO: Deflation  | ||||
|    */ | ||||
| namespace Grid { | ||||
|  | ||||
| @@ -98,7 +97,6 @@ namespace Grid { | ||||
|   protected: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|     OperatorFunction<Field> & _HermitianRBSolver; | ||||
|      | ||||
|     int CBfactorise; | ||||
|     bool subGuess; | ||||
|     bool useSolnAsInitGuess; // if true user-supplied solution vector is used as initial guess for solver | ||||
| @@ -134,31 +132,6 @@ namespace Grid { | ||||
|       (*this)(_Matrix,in,out,guess); | ||||
|     } | ||||
|  | ||||
|     void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)  | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       Field tmp(grid); | ||||
|       int nblock = in.size(); | ||||
|       for(int b=0;b<nblock;b++){ | ||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||
|       } | ||||
|     } | ||||
|     // James can write his own deflated guesser | ||||
|     // with optimised code for the inner products | ||||
|     //    RedBlackSolveSplitGrid(); | ||||
|     //    RedBlackSolve(_Matrix,src_o,sol_o);  | ||||
|  | ||||
|     void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       Field tmp(grid); | ||||
|       int nblock = in.size(); | ||||
|       for(int b=0;b<nblock;b++) { | ||||
| 	pickCheckerboard(Even,tmp,in[b]); | ||||
| 	RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     template<class Guesser> | ||||
|     void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)  | ||||
|     { | ||||
| @@ -177,29 +150,24 @@ namespace Grid { | ||||
|       //////////////////////////////////////////////// | ||||
|       // Prepare RedBlack source | ||||
|       //////////////////////////////////////////////// | ||||
|       RedBlackSource(_Matrix,in,src_o); | ||||
| 	//      for(int b=0;b<nblock;b++){ | ||||
| 	//	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||
| 	//      } | ||||
|        | ||||
|       for(int b=0;b<nblock;b++){ | ||||
| 	RedBlackSource(_Matrix,in[b],tmp,src_o[b]); | ||||
|       } | ||||
|       //////////////////////////////////////////////// | ||||
|       // Make the guesses | ||||
|       //////////////////////////////////////////////// | ||||
|       if ( subGuess ) guess_save.resize(nblock,grid); | ||||
|  | ||||
|        | ||||
|       if(useSolnAsInitGuess) { | ||||
|         for(int b=0;b<nblock;b++){ | ||||
|       for(int b=0;b<nblock;b++){ | ||||
|         if(useSolnAsInitGuess) { | ||||
|           pickCheckerboard(Odd, sol_o[b], out[b]); | ||||
|         } else { | ||||
|           guess(src_o[b],sol_o[b]);  | ||||
|         } | ||||
|       } else { | ||||
|         guess(src_o, sol_o);  | ||||
|       } | ||||
|  | ||||
| 	    if ( subGuess ) {  | ||||
|         for(int b=0;b<nblock;b++){ | ||||
|           guess_save[b] = sol_o[b]; | ||||
|         } | ||||
| 	if ( subGuess ) {  | ||||
| 	  guess_save[b] = sol_o[b]; | ||||
| 	} | ||||
|       } | ||||
|       ////////////////////////////////////////////////////////////// | ||||
|       // Call the block solver | ||||
| @@ -221,20 +189,13 @@ namespace Grid { | ||||
| 	///////////////////////////////////////////////// | ||||
| 	// Check unprec residual if possible | ||||
| 	///////////////////////////////////////////////// | ||||
| 	if ( ! subGuess ) {	   | ||||
|  | ||||
| 	  if ( this->adjoint() ) _Matrix.Mdag(out[b],resid);  | ||||
| 	  else                   _Matrix.M(out[b],resid);  | ||||
|  | ||||
| 	if ( ! subGuess ) { | ||||
| 	  _Matrix.M(out[b],resid);  | ||||
| 	  resid = resid-in[b]; | ||||
| 	  RealD ns = norm2(in[b]); | ||||
| 	  RealD nr = norm2(resid); | ||||
| 	 | ||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; | ||||
| 	  if ( this->adjoint() )  | ||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||
| 	  else                    | ||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl; | ||||
| 	} else { | ||||
| 	  std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl; | ||||
| 	} | ||||
| @@ -288,21 +249,12 @@ namespace Grid { | ||||
|  | ||||
|       // Verify the unprec residual | ||||
|       if ( ! subGuess ) { | ||||
|  | ||||
| 	std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint "<< this->adjoint() << std::endl; | ||||
| 	 | ||||
| 	if ( this->adjoint() ) _Matrix.Mdag(out,resid);  | ||||
| 	else                   _Matrix.M(out,resid);  | ||||
|  | ||||
|         _Matrix.M(out,resid);  | ||||
|         resid = resid-in; | ||||
|         RealD ns = norm2(in); | ||||
|         RealD nr = norm2(resid); | ||||
|  | ||||
| 	  if ( this->adjoint() )  | ||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase adjoint solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; | ||||
| 	  else                    | ||||
| 	    std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid "<<std::sqrt(nr/ns) << std::endl; | ||||
|  | ||||
|         std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl; | ||||
|       } else { | ||||
|         std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl; | ||||
|       } | ||||
| @@ -311,7 +263,6 @@ namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Override in derived.  | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     virtual bool adjoint(void) { return false; } | ||||
|     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0; | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o)                           =0; | ||||
| @@ -665,127 +616,6 @@ namespace Grid { | ||||
|         this->_HermitianRBSolver(_OpEO, src_o, sol_o);  | ||||
|       } | ||||
|   }; | ||||
|  | ||||
|   /* | ||||
|    * Red black Schur decomposition | ||||
|    * | ||||
|    *  M = (Mee Meo) =  (1             0 )   (Mee   0               )  (1 Mee^{-1} Meo) | ||||
|    *      (Moe Moo)    (Moe Mee^-1    1 )   (0   Moo-Moe Mee^-1 Meo)  (0   1         ) | ||||
|    *                =         L                     D                     U | ||||
|    * | ||||
|    * L^-1 = (1              0 ) | ||||
|    *        (-MoeMee^{-1}   1 )    | ||||
|    * L^{dag} = ( 1       Mee^{-dag} Moe^{dag} ) | ||||
|    *           ( 0       1                    ) | ||||
|    * | ||||
|    * U^-1 = (1   -Mee^{-1} Meo) | ||||
|    *        (0    1           ) | ||||
|    * U^{dag} = ( 1                 0) | ||||
|    *           (Meo^dag Mee^{-dag} 1) | ||||
|    * U^{-dag} = (  1                 0) | ||||
|    *            (-Meo^dag Mee^{-dag} 1) | ||||
|    * | ||||
|    * | ||||
|    *********************** | ||||
|    *     M^dag psi = eta | ||||
|    *********************** | ||||
|    * | ||||
|    * Really for Mobius: (Wilson - easier to just use gamma 5 hermiticity) | ||||
|    * | ||||
|    *    Mdag psi     =         Udag  Ddag  Ldag psi = eta | ||||
|    * | ||||
|    * U^{-dag} = (  1                 0) | ||||
|    *            (-Meo^dag Mee^{-dag} 1) | ||||
|    * | ||||
|    * | ||||
|    * i)                D^dag phi =  (U^{-dag}  eta) | ||||
|    *                        eta'_e = eta_e | ||||
|    *                        eta'_o = (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||
|    *  | ||||
|    *      phi_o = D_oo^-dag eta'_o = D_oo^-dag (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||
|    * | ||||
|    *      phi_e = D_ee^-dag eta'_e = D_ee^-dag eta_e | ||||
|    *  | ||||
|    * Solve:  | ||||
|    * | ||||
|    *      D_oo D_oo^dag phi_o = D_oo (eta_o - Meo^dag Mee^{-dag} eta_e) | ||||
|    * | ||||
|    * ii)  | ||||
|    *      phi = L^dag psi => psi = L^-dag phi.  | ||||
|    * | ||||
|    * L^{-dag} = ( 1      -Mee^{-dag} Moe^{dag} ) | ||||
|    *            ( 0       1                    ) | ||||
|    * | ||||
|    *   => sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... | ||||
|    *   => sol_o = phi_o | ||||
|    */ | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Site diagonal has Mooee on it, but solve the Adjoint system | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class Field> class SchurRedBlackDiagMooeeDagSolve : public SchurRedBlackBase<Field> { | ||||
|   public: | ||||
|     typedef CheckerBoardedSparseMatrixBase<Field> Matrix; | ||||
|  | ||||
|     virtual bool adjoint(void) { return true; } | ||||
|     SchurRedBlackDiagMooeeDagSolve(OperatorFunction<Field> &HermitianRBSolver, | ||||
| 				   const bool initSubGuess = false, | ||||
| 				   const bool _solnAsInitGuess = false)   | ||||
|       : SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess,_solnAsInitGuess) {}; | ||||
|  | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Override RedBlack specialisation | ||||
|     ////////////////////////////////////////////////////// | ||||
|     virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field   tmp(grid); | ||||
|       Field  Mtmp(grid); | ||||
|  | ||||
|       pickCheckerboard(Even,src_e,src); | ||||
|       pickCheckerboard(Odd ,src_o,src); | ||||
|       ///////////////////////////////////////////////////// | ||||
|       // src_o = (source_o - Moe^dag MeeInvDag source_e) | ||||
|       ///////////////////////////////////////////////////// | ||||
|       _Matrix.MooeeInvDag(src_e,tmp);  assert(  tmp.Checkerboard() ==Even); | ||||
|       _Matrix.MeooeDag   (tmp,Mtmp);   assert( Mtmp.Checkerboard() ==Odd);      | ||||
|       tmp=src_o-Mtmp;                  assert(  tmp.Checkerboard() ==Odd);      | ||||
|  | ||||
|       // get the right Mpc | ||||
|       SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       _HermOpEO.Mpc(tmp,src_o);     assert(src_o.Checkerboard() ==Odd); | ||||
|     } | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const Field &src_o, Field &sol_o) | ||||
|     { | ||||
|       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);  | ||||
|     }; | ||||
|     virtual void RedBlackSolve   (Matrix & _Matrix,const std::vector<Field> &src_o,  std::vector<Field> &sol_o) | ||||
|     { | ||||
|       SchurDiagMooeeDagOperator<Matrix,Field> _HermOpEO(_Matrix); | ||||
|       this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); | ||||
|     } | ||||
|     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) | ||||
|     { | ||||
|       GridBase *grid = _Matrix.RedBlackGrid(); | ||||
|       GridBase *fgrid= _Matrix.Grid(); | ||||
|  | ||||
|       Field  sol_e(grid); | ||||
|       Field  tmp(grid); | ||||
|       /////////////////////////////////////////////////// | ||||
|       // sol_e = M_ee^-dag * ( src_e - Moe^dag phi_o )... | ||||
|       // sol_o = phi_o | ||||
|       /////////////////////////////////////////////////// | ||||
|       _Matrix.MeooeDag(sol_o,tmp);      assert(tmp.Checkerboard()==Even); | ||||
|       tmp = src_e-tmp;                  assert(tmp.Checkerboard()==Even); | ||||
|       _Matrix.MooeeInvDag(tmp,sol_e);   assert(sol_e.Checkerboard()==Even); | ||||
|        | ||||
|       setCheckerboard(sol,sol_e); assert(  sol_e.Checkerboard() ==Even); | ||||
|       setCheckerboard(sol,sol_o); assert(  sol_o.Checkerboard() ==Odd ); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -9,30 +9,14 @@ NAMESPACE_BEGIN(Grid); | ||||
| #define AccSmall (3) | ||||
| #define Shared   (4) | ||||
| #define SharedSmall (5) | ||||
| #undef GRID_MM_VERBOSE  | ||||
| uint64_t total_shared; | ||||
| uint64_t total_device; | ||||
| uint64_t total_host;; | ||||
| void MemoryManager::PrintBytes(void) | ||||
| { | ||||
|   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; | ||||
|   std::cout << " MemoryManager : PrintBytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : ------------------------------------ "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared      Mbytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<(total_host>>20)  <<" cpu         Mbytes "<<std::endl; | ||||
|   uint64_t cacheBytes; | ||||
|   cacheBytes = CacheBytes[Cpu]; | ||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl; | ||||
|   cacheBytes = CacheBytes[Acc]; | ||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl; | ||||
|   cacheBytes = CacheBytes[Shared]; | ||||
|   std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl; | ||||
|    | ||||
| #ifdef GRID_CUDA | ||||
|   cuda_mem(); | ||||
| #endif | ||||
|    | ||||
|   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; | ||||
| } | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| @@ -40,114 +24,88 @@ void MemoryManager::PrintBytes(void) | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; | ||||
| int MemoryManager::Victim[MemoryManager::NallocType]; | ||||
| int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 }; | ||||
| uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType]; | ||||
| int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| // Actual allocation and deallocation utils | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| void *MemoryManager::AcceleratorAllocate(size_t bytes) | ||||
| { | ||||
|   total_device+=bytes; | ||||
|   void *ptr = (void *) Lookup(bytes,Acc); | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocDevice(bytes); | ||||
|     total_device+=bytes; | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"AcceleratorAllocate "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
|   return ptr; | ||||
| } | ||||
| void  MemoryManager::AcceleratorFree    (void *ptr,size_t bytes) | ||||
| { | ||||
|   total_device-=bytes; | ||||
|   void *__freeme = Insert(ptr,bytes,Acc); | ||||
|   if ( __freeme ) { | ||||
|     acceleratorFreeDevice(__freeme); | ||||
|     total_device-=bytes; | ||||
| //       PrintBytes(); | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"AcceleratorFree "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
| } | ||||
| void *MemoryManager::SharedAllocate(size_t bytes) | ||||
| { | ||||
|   total_shared+=bytes; | ||||
|   void *ptr = (void *) Lookup(bytes,Shared); | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocShared(bytes); | ||||
|     total_shared+=bytes; | ||||
|         std::cout <<GridLogMessage<<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||
| //        PrintBytes(); | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"SharedAllocate "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
|   return ptr; | ||||
| } | ||||
| void  MemoryManager::SharedFree    (void *ptr,size_t bytes) | ||||
| { | ||||
|   total_shared-=bytes; | ||||
|   void *__freeme = Insert(ptr,bytes,Shared); | ||||
|   if ( __freeme ) { | ||||
|     acceleratorFreeShared(__freeme); | ||||
|     total_shared-=bytes; | ||||
|     //    PrintBytes(); | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"SharedFree "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
| } | ||||
| #ifdef GRID_UVM | ||||
| void *MemoryManager::CpuAllocate(size_t bytes) | ||||
| { | ||||
|   total_host+=bytes; | ||||
|   void *ptr = (void *) Lookup(bytes,Cpu); | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocShared(bytes); | ||||
|     total_host+=bytes; | ||||
| //    std::cout << GridLogMessage<< "MemoryManager:: CpuAllocate  total_host= "<<total_host<<" "<< ptr << std::endl; | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"CpuAllocate "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
|   return ptr; | ||||
| } | ||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||
| { | ||||
|   total_host-=bytes; | ||||
|   NotifyDeletion(_ptr); | ||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||
|   if ( __freeme ) {  | ||||
|     acceleratorFreeShared(__freeme); | ||||
| //    std::cout << GridLogMessage<< "MemoryManager:: CpuFree  total_host= "<<total_host<<" "<< __freeme << std::endl; | ||||
|     total_host-=bytes; | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"CpuFree "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
| } | ||||
| #else | ||||
| void *MemoryManager::CpuAllocate(size_t bytes) | ||||
| { | ||||
|   total_host+=bytes; | ||||
|   void *ptr = (void *) Lookup(bytes,Cpu); | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocCpu(bytes); | ||||
|     total_host+=bytes; | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"CpuAllocate "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
|   return ptr; | ||||
| } | ||||
| void  MemoryManager::CpuFree    (void *_ptr,size_t bytes) | ||||
| { | ||||
|   total_host-=bytes; | ||||
|   NotifyDeletion(_ptr); | ||||
|   void *__freeme = Insert(_ptr,bytes,Cpu); | ||||
|   if ( __freeme ) {  | ||||
|     acceleratorFreeCpu(__freeme); | ||||
|     total_host-=bytes; | ||||
|   } | ||||
| #ifdef GRID_MM_VERBOSE | ||||
|   std::cout <<"CpuFree "<<std::endl; | ||||
|   PrintBytes(); | ||||
| #endif | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @@ -225,13 +183,13 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,int type) | ||||
| #ifdef ALLOCATION_CACHE | ||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||
|   int cache = type + small; | ||||
|   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);   | ||||
|   return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache]);   | ||||
| #else | ||||
|   return ptr; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)  | ||||
| void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim)  | ||||
| { | ||||
|   assert(ncache>0); | ||||
| #ifdef GRID_OMP | ||||
| @@ -255,7 +213,6 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | ||||
|  | ||||
|   if ( entries[v].valid ) { | ||||
|     ret = entries[v].address; | ||||
|     cacheBytes -= entries[v].bytes; | ||||
|     entries[v].valid = 0; | ||||
|     entries[v].address = NULL; | ||||
|     entries[v].bytes = 0; | ||||
| @@ -264,7 +221,6 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | ||||
|   entries[v].address=ptr; | ||||
|   entries[v].bytes  =bytes; | ||||
|   entries[v].valid  =1; | ||||
|   cacheBytes += bytes; | ||||
|  | ||||
|   return ret; | ||||
| } | ||||
| @@ -274,13 +230,13 @@ void *MemoryManager::Lookup(size_t bytes,int type) | ||||
| #ifdef ALLOCATION_CACHE | ||||
|   bool small = (bytes < GRID_ALLOC_SMALL_LIMIT); | ||||
|   int cache = type+small; | ||||
|   return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]); | ||||
|   return Lookup(bytes,Entries[cache],Ncache[cache]); | ||||
| #else | ||||
|   return NULL; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)  | ||||
| void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache)  | ||||
| { | ||||
|   assert(ncache>0); | ||||
| #ifdef GRID_OMP | ||||
| @@ -289,7 +245,6 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach | ||||
|   for(int e=0;e<ncache;e++){ | ||||
|     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { | ||||
|       entries[e].valid = 0; | ||||
|       cacheBytes -= entries[e].bytes; | ||||
|       return entries[e].address; | ||||
|     } | ||||
|   } | ||||
|   | ||||
| @@ -82,15 +82,14 @@ private: | ||||
|   static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; | ||||
|   static int Victim[NallocType]; | ||||
|   static int Ncache[NallocType]; | ||||
|   static uint64_t CacheBytes[NallocType]; | ||||
|  | ||||
|   ///////////////////////////////////////////////// | ||||
|   // Free pool | ||||
|   ///////////////////////////////////////////////// | ||||
|   static void *Insert(void *ptr,size_t bytes,int type) ; | ||||
|   static void *Lookup(size_t bytes,int type) ; | ||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ; | ||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ; | ||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; | ||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; | ||||
|  | ||||
|   static void PrintBytes(void); | ||||
|  public: | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
|  | ||||
| #warning "Using explicit device memory copies" | ||||
| NAMESPACE_BEGIN(Grid); | ||||
| //#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | ||||
| //define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout); | ||||
| #define dprintf(...) | ||||
|  | ||||
|  | ||||
| @@ -429,7 +429,6 @@ void  MemoryManager::NotifyDeletion(void *_ptr) | ||||
| } | ||||
| void  MemoryManager::Print(void) | ||||
| { | ||||
|   PrintBytes(); | ||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||
|   std::cout << GridLogDebug << "Memory Manager                             " << std::endl; | ||||
|   std::cout << GridLogDebug << "--------------------------------------------" << std::endl; | ||||
|   | ||||
| @@ -33,8 +33,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| bool Stencil_force_mpi = true; | ||||
|  | ||||
| /////////////////////////////////////////////////////////////// | ||||
| // Info that is setup once and indept of cartesian layout | ||||
| /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -35,8 +35,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| extern bool Stencil_force_mpi ; | ||||
|  | ||||
| class CartesianCommunicator : public SharedMemory { | ||||
|  | ||||
| public:     | ||||
|   | ||||
| @@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
|   double off_node_bytes=0.0; | ||||
|   int tag; | ||||
|  | ||||
|   if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||
|   if ( gfrom ==MPI_UNDEFINED) { | ||||
|     tag= dir+from*32; | ||||
|     ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); | ||||
|     assert(ierr==0); | ||||
| @@ -378,18 +378,12 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques | ||||
|     off_node_bytes+=bytes; | ||||
|   } | ||||
|  | ||||
|   if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { | ||||
|   if ( gdest == MPI_UNDEFINED ) { | ||||
|     tag= dir+_processor*32; | ||||
|     ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); | ||||
|     assert(ierr==0); | ||||
|     list.push_back(xrq); | ||||
|     off_node_bytes+=bytes; | ||||
|   } else { | ||||
|     // TODO : make a OMP loop on CPU, call threaded bcopy | ||||
|     void *shm = (void *) this->ShmBufferTranslate(dest,recv); | ||||
|     assert(shm!=NULL); | ||||
|     acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); | ||||
|     acceleratorCopySynchronise(); // MPI prob slower | ||||
|   } | ||||
|  | ||||
|   if ( CommunicatorPolicy == CommunicatorPolicySequential ) { | ||||
|   | ||||
| @@ -513,16 +513,26 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Each MPI rank should allocate our own buffer | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||
|   auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); | ||||
|   auto zeContext= cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); | ||||
|   ze_device_mem_alloc_desc_t zeDesc = {}; | ||||
|   zeMemAllocDevice(zeContext,&zeDesc,bytes,2*1024*1024,zeDevice,&ShmCommBuf); | ||||
|   std::cout << WorldRank << header " SharedMemoryMPI.cc zeMemAllocDevice "<< bytes  | ||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||
| #else   | ||||
|   ShmCommBuf = acceleratorAllocDevice(bytes); | ||||
| #endif   | ||||
|   if (ShmCommBuf == (void *)NULL ) { | ||||
|     std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; | ||||
|     exit(EXIT_FAILURE);   | ||||
|   } | ||||
|   if ( WorldRank == 0 ){ | ||||
|   //  if ( WorldRank == 0 ){ | ||||
|   if ( 1 ){ | ||||
|     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||
|   } | ||||
|   SharedMemoryZero(ShmCommBuf,bytes); | ||||
|   //  SharedMemoryZero(ShmCommBuf,bytes); | ||||
|   std::cout<< "Setting up IPC"<<std::endl; | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Loop over ranks/gpu's on our node | ||||
| @@ -533,27 +543,21 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     ////////////////////////////////////////////////// | ||||
|     // If it is me, pass around the IPC access key | ||||
|     ////////////////////////////////////////////////// | ||||
|     void * thisBuf = ShmCommBuf; | ||||
|     if(!Stencil_force_mpi) { | ||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||
|     typedef struct { int fd; pid_t pid ; } clone_mem_t; | ||||
|  | ||||
|     auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); | ||||
|     auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); | ||||
|        | ||||
|     ze_ipc_mem_handle_t ihandle; | ||||
|     clone_mem_t handle; | ||||
|  | ||||
|     ze_ipc_mem_handle_t handle; | ||||
|     if ( r==WorldShmRank ) {  | ||||
|       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); | ||||
|       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&handle); | ||||
|       if ( err != ZE_RESULT_SUCCESS ) { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
| 	exit(EXIT_FAILURE); | ||||
|       } else { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
|       } | ||||
|       memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int)); | ||||
|       handle.pid = getpid(); | ||||
|       std::cerr<<"Allocated IpcHandle rank "<<r<<" (hex) "; | ||||
|       for(int c=0;c<ZE_MAX_IPC_HANDLE_SIZE;c++){ | ||||
| 	std::cerr<<std::hex<<(uint32_t)((uint8_t)handle.data[c])<<std::dec; | ||||
|       } | ||||
|       std::cerr<<std::endl; | ||||
|     } | ||||
| #endif | ||||
| #ifdef GRID_CUDA | ||||
| @@ -576,7 +580,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     ////////////////////////////////////////////////// | ||||
|     // Share this IPC handle across the Shm Comm | ||||
|     ////////////////////////////////////////////////// | ||||
| @@ -592,31 +595,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     /////////////////////////////////////////////////////////////// | ||||
|     // If I am not the source, overwrite thisBuf with remote buffer | ||||
|     /////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     void * thisBuf = ShmCommBuf; | ||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||
|     if ( r!=WorldShmRank ) { | ||||
|       thisBuf = nullptr; | ||||
|       std::cout<<"mapping seeking remote pid/fd " | ||||
| 	       <<handle.pid<<"/" | ||||
| 	       <<handle.fd<<std::endl; | ||||
|  | ||||
|       int pidfd = syscall(SYS_pidfd_open,handle.pid,0); | ||||
|       std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n"; | ||||
|       //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0); | ||||
|       int myfd  = syscall(438,pidfd,handle.fd,0); | ||||
|  | ||||
|       std::cout<<"Using IpcHandle myfd "<<myfd<<"\n"; | ||||
|        | ||||
|       memcpy((void *)&ihandle,(void *)&myfd,sizeof(int)); | ||||
|  | ||||
|       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf); | ||||
|       std::cerr<<"Using IpcHandle rank "<<r<<" "; | ||||
|       for(int c=0;c<ZE_MAX_IPC_HANDLE_SIZE;c++){ | ||||
| 	std::cerr<<std::hex<<(uint32_t)((uint8_t)handle.data[c])<<std::dec; | ||||
|       } | ||||
|       std::cerr<<std::endl; | ||||
|       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,handle,0,&thisBuf); | ||||
|       if ( err != ZE_RESULT_SUCCESS ) { | ||||
| 	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||
| 	std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||
| 	exit(EXIT_FAILURE); | ||||
|       } else { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl; | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
|       } | ||||
|       assert(thisBuf!=nullptr); | ||||
|     } | ||||
| @@ -642,7 +636,6 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     /////////////////////////////////////////////////////////////// | ||||
|     // Save a copy of the device buffers | ||||
|     /////////////////////////////////////////////////////////////// | ||||
|     } | ||||
|     WorldShmCommBufs[r] = thisBuf; | ||||
| #else | ||||
|     WorldShmCommBufs[r] = ShmCommBuf; | ||||
|   | ||||
| @@ -46,4 +46,3 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/lattice/Lattice_unary.h> | ||||
| #include <Grid/lattice/Lattice_transfer.h> | ||||
| #include <Grid/lattice/Lattice_basis.h> | ||||
| #include <Grid/lattice/Lattice_crc.h> | ||||
|   | ||||
| @@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> & | ||||
|   autoView( x_v , x, AcceleratorRead); | ||||
|   autoView( y_v , y, AcceleratorRead); | ||||
|   accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ | ||||
|     auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]); | ||||
|     auto tmp = a*x_v(ss)+y_v(ss); | ||||
|     coalescedWrite(ret_v[ss],tmp); | ||||
|   }); | ||||
| } | ||||
|   | ||||
| @@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | ||||
|  | ||||
| 	for(int k=k0; k<k1; ++k){ | ||||
| 	  auto tmp = coalescedRead(Bp[ss*nrot+j]); | ||||
| 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss])); | ||||
| 	  coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); | ||||
| 	} | ||||
|       }); | ||||
|  | ||||
| @@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | ||||
| 	int jj  =j0+j; | ||||
| 	int ss =sj/nrot; | ||||
| 	int sss=ss+s; | ||||
| 	coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||
| 	coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||
|       }); | ||||
|   } | ||||
| #endif | ||||
|   | ||||
| @@ -1,55 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/lattice/Lattice_crc.h | ||||
|  | ||||
|     Copyright (C) 2021 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1) | ||||
| { | ||||
|   auto ff = localNorm2(f); | ||||
|   if ( mu==-1 ) mu = f.Grid()->Nd()-1; | ||||
|   typedef typename vobj::tensor_reduced normtype; | ||||
|   typedef typename normtype::scalar_object scalar; | ||||
|   std::vector<scalar> sff; | ||||
|   sliceSum(ff,sff,mu); | ||||
|   for(int t=0;t<sff.size();t++){ | ||||
|     std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class vobj> uint32_t crc(Lattice<vobj> & buf) | ||||
| { | ||||
|   autoView( buf_v , buf, CpuRead); | ||||
|   return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites()); | ||||
| } | ||||
|  | ||||
| #define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|  | ||||
| @@ -361,7 +361,6 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | ||||
|   // But easily avoided by using double precision fields | ||||
|   /////////////////////////////////////////////////////// | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_object::scalar_type scalar_type; | ||||
|   GridBase  *grid = Data.Grid(); | ||||
|   assert(grid!=NULL); | ||||
|  | ||||
| @@ -420,19 +419,20 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector< | ||||
|   } | ||||
|    | ||||
|   // sum over nodes. | ||||
|   sobj gsum; | ||||
|   for(int t=0;t<fd;t++){ | ||||
|     int pt = t/ld; // processor plane | ||||
|     int lt = t%ld; | ||||
|     if ( pt == grid->_processor_coor[orthogdim] ) { | ||||
|       result[t]=lsSum[lt]; | ||||
|       gsum=lsSum[lt]; | ||||
|     } else { | ||||
|       result[t]=Zero(); | ||||
|       gsum=Zero(); | ||||
|     } | ||||
|  | ||||
|     grid->GlobalSum(gsum); | ||||
|  | ||||
|     result[t]=gsum; | ||||
|   } | ||||
|   scalar_type * ptr = (scalar_type *) &result[0]; | ||||
|   int words = fd*sizeof(sobj)/sizeof(scalar_type); | ||||
|   grid->GlobalSumVector(ptr, words); | ||||
| } | ||||
|  | ||||
| template<class vobj> | ||||
|   | ||||
| @@ -32,9 +32,8 @@ | ||||
| #include <random> | ||||
|  | ||||
| #ifdef RNG_SITMO | ||||
| #include <Grid/random/sitmo_prng_engine.hpp> | ||||
| #include <Grid/sitmo_rng/sitmo_prng_engine.hpp> | ||||
| #endif  | ||||
| #include <Grid/random/gaussian.h> | ||||
|  | ||||
| #if defined(RNG_SITMO) | ||||
| #define RNG_FAST_DISCARD | ||||
| @@ -143,7 +142,7 @@ public: | ||||
|  | ||||
|   std::vector<RngEngine>                             _generators; | ||||
|   std::vector<std::uniform_real_distribution<RealD> > _uniform; | ||||
|   std::vector<Grid::gaussian_distribution<RealD> >    _gaussian; | ||||
|   std::vector<std::normal_distribution<RealD> >       _gaussian; | ||||
|   std::vector<std::discrete_distribution<int32_t> >   _bernoulli; | ||||
|   std::vector<std::uniform_int_distribution<uint32_t> > _uid; | ||||
|  | ||||
| @@ -244,7 +243,7 @@ public: | ||||
|   GridSerialRNG() : GridRNGbase() { | ||||
|     _generators.resize(1); | ||||
|     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1}); | ||||
|     _gaussian.resize(1,gaussian_distribution<RealD>(0.0,1.0) ); | ||||
|     _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) ); | ||||
|     _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1}); | ||||
|     _uid.resize(1,std::uniform_int_distribution<uint32_t>() ); | ||||
|   } | ||||
| @@ -358,7 +357,7 @@ public: | ||||
|  | ||||
|     _generators.resize(_vol); | ||||
|     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); | ||||
|     _gaussian.resize(_vol,gaussian_distribution<RealD>(0.0,1.0) ); | ||||
|     _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) ); | ||||
|     _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1}); | ||||
|     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() ); | ||||
|   } | ||||
|   | ||||
| @@ -364,22 +364,16 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | ||||
|   autoView( coarseData_ , coarseData, AcceleratorWrite); | ||||
|   autoView( fineData_   , fineData, AcceleratorRead); | ||||
|  | ||||
|   auto coarseData_p = &coarseData_[0]; | ||||
|   auto fineData_p = &fineData_[0]; | ||||
|    | ||||
|   Coordinate fine_rdimensions = fine->_rdimensions; | ||||
|   Coordinate coarse_rdimensions = coarse->_rdimensions; | ||||
|  | ||||
|   vobj zz = Zero(); | ||||
|    | ||||
|   accelerator_for(sc,coarse->oSites(),1,{ | ||||
|  | ||||
|       // One thread per sub block | ||||
|       Coordinate coor_c(_ndimension); | ||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||
|       coarseData_[sc]=Zero(); | ||||
|  | ||||
|       vobj cd = zz; | ||||
|        | ||||
|       for(int sb=0;sb<blockVol;sb++){ | ||||
|  | ||||
| 	int sf; | ||||
| @@ -389,11 +383,9 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | ||||
| 	for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; | ||||
| 	Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); | ||||
|  | ||||
| 	cd=cd+fineData_p[sf]; | ||||
| 	coarseData_[sc]=coarseData_[sc]+fineData_[sf]; | ||||
|       } | ||||
|  | ||||
|       coarseData_p[sc] = cd; | ||||
|  | ||||
|     }); | ||||
|   return; | ||||
| } | ||||
| @@ -785,7 +777,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
| void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
| @@ -1010,95 +1002,53 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | ||||
|   }); | ||||
| } | ||||
|  | ||||
| //The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls | ||||
| class precisionChangeWorkspace{ | ||||
|   std::pair<Integer,Integer>* fmap_device; //device pointer | ||||
| public: | ||||
|   precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid){ | ||||
|     //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device | ||||
|     assert(out_grid->Nd() == in_grid->Nd()); | ||||
|     for(int d=0;d<out_grid->Nd();d++){ | ||||
|       assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]); | ||||
|     } | ||||
|     int Nsimd_out = out_grid->Nsimd(); | ||||
|  | ||||
|     std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these | ||||
|     for(int lane=0; lane < out_grid->Nsimd(); lane++) | ||||
|       out_grid->iCoorFromIindex(out_icorrs[lane], lane); | ||||
|    | ||||
|     std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd | ||||
|     thread_for(out_oidx,out_grid->oSites(),{ | ||||
| 	Coordinate out_ocorr;  | ||||
| 	out_grid->oCoorFromOindex(out_ocorr, out_oidx); | ||||
|        | ||||
| 	Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate) | ||||
| 	for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ | ||||
| 	  out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr); | ||||
| 	 | ||||
| 	  //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr); | ||||
| 	  //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice | ||||
| 	  //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity | ||||
| 	  int in_oidx = 0, in_lane = 0; | ||||
| 	  for(int d=0;d<in_grid->_ndimension;d++){ | ||||
| 	    in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] ); | ||||
| 	    in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] ); | ||||
| 	  } | ||||
| 	  fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane ); | ||||
| 	} | ||||
|       }); | ||||
|  | ||||
|     //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines) | ||||
|     size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>); | ||||
|     fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes); | ||||
|     acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes);  | ||||
|   } | ||||
|  | ||||
|   //Prevent moving or copying | ||||
|   precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete; | ||||
|   precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete; | ||||
|   precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete; | ||||
|   precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete; | ||||
|    | ||||
|   std::pair<Integer,Integer> const* getMap() const{ return fmap_device; } | ||||
|  | ||||
|   ~precisionChangeWorkspace(){ | ||||
|     acceleratorFreeDevice(fmap_device); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| //Convert a lattice of one precision to another. The input workspace contains the mapping data. | ||||
| template<class VobjOut, class VobjIn> | ||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){ | ||||
|   static_assert( std::is_same<typename VobjOut::DoublePrecision, typename VobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same | ||||
|  | ||||
|   out.Checkerboard() = in.Checkerboard(); | ||||
|   constexpr int Nsimd_out = VobjOut::Nsimd(); | ||||
|  | ||||
|   std::pair<Integer,Integer> const* fmap_device = workspace.getMap(); | ||||
|  | ||||
|   //Do the copy/precision change | ||||
|   autoView( out_v , out, AcceleratorWrite); | ||||
|   autoView( in_v , in, AcceleratorRead); | ||||
|  | ||||
|   accelerator_for(out_oidx, out.Grid()->oSites(), 1,{ | ||||
|       std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out; | ||||
|       for(int out_lane=0; out_lane < Nsimd_out; out_lane++){       | ||||
| 	int in_oidx = fmap_osite[out_lane].first; | ||||
| 	int in_lane = fmap_osite[out_lane].second; | ||||
| 	copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane); | ||||
|       } | ||||
|     }); | ||||
| } | ||||
|  | ||||
| //Convert a Lattice from one precision to another | ||||
| //Generate the workspace in place; if multiple calls with the same mapping are performed, consider pregenerating the workspace and reusing | ||||
| template<class VobjOut, class VobjIn> | ||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||
|   precisionChangeWorkspace workspace(out.Grid(), in.Grid()); | ||||
|   precisionChange(out, in, workspace); | ||||
| } | ||||
| void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in) | ||||
| { | ||||
|   assert(out.Grid()->Nd() == in.Grid()->Nd()); | ||||
|   for(int d=0;d<out.Grid()->Nd();d++){ | ||||
|     assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]); | ||||
|   } | ||||
|   out.Checkerboard() = in.Checkerboard(); | ||||
|   GridBase *in_grid=in.Grid(); | ||||
|   GridBase *out_grid = out.Grid(); | ||||
|  | ||||
|   typedef typename VobjOut::scalar_object SobjOut; | ||||
|   typedef typename VobjIn::scalar_object SobjIn; | ||||
|  | ||||
|   int ndim = out.Grid()->Nd(); | ||||
|   int out_nsimd = out_grid->Nsimd(); | ||||
|      | ||||
|   std::vector<Coordinate > out_icoor(out_nsimd); | ||||
|        | ||||
|   for(int lane=0; lane < out_nsimd; lane++){ | ||||
|     out_icoor[lane].resize(ndim); | ||||
|     out_grid->iCoorFromIindex(out_icoor[lane], lane); | ||||
|   } | ||||
|          | ||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||
|      | ||||
|   autoView( out_v , out, CpuWrite); | ||||
|   thread_for(out_oidx,out_grid->oSites(),{ | ||||
|     Coordinate out_ocoor(ndim); | ||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||
|  | ||||
|     ExtractPointerArray<SobjOut> ptrs(out_nsimd);       | ||||
|  | ||||
|     Coordinate lcoor(out_grid->Nd()); | ||||
|        | ||||
|     for(int lane=0; lane < out_nsimd; lane++){ | ||||
|       for(int mu=0;mu<ndim;mu++) | ||||
| 	lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu]; | ||||
| 	 | ||||
|       int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); | ||||
|       ptrs[lane] = &in_slex_conv[llex]; | ||||
|     } | ||||
|     merge(out_v[out_oidx], ptrs, 0); | ||||
|   }); | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Communicate between grids | ||||
|   | ||||
| @@ -69,7 +69,6 @@ GridLogger GridLogDebug  (1, "Debug", GridLogColours, "PURPLE"); | ||||
| GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); | ||||
| GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); | ||||
| GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); | ||||
|  | ||||
| void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|   GridLogError.Active(0); | ||||
| @@ -80,7 +79,6 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|   GridLogPerformance.Active(0); | ||||
|   GridLogIntegrator.Active(1); | ||||
|   GridLogColours.Active(0); | ||||
|   GridLogHMC.Active(1); | ||||
|  | ||||
|   for (int i = 0; i < logstreams.size(); i++) { | ||||
|     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1); | ||||
| @@ -89,8 +87,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) { | ||||
|     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1); | ||||
|     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1); | ||||
|     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); | ||||
|     if (logstreams[i] == std::string("NoIntegrator"))  GridLogIntegrator.Active(0); | ||||
|     if (logstreams[i] == std::string("NoHMC"))         GridLogHMC.Active(0); | ||||
|     if (logstreams[i] == std::string("Integrator"))  GridLogIntegrator.Active(1); | ||||
|     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1); | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -182,7 +182,6 @@ extern GridLogger GridLogDebug  ; | ||||
| extern GridLogger GridLogPerformance; | ||||
| extern GridLogger GridLogIterative  ; | ||||
| extern GridLogger GridLogIntegrator  ; | ||||
| extern GridLogger GridLogHMC; | ||||
| extern Colours    GridLogColours; | ||||
|  | ||||
| std::string demangle(const char* name) ; | ||||
|   | ||||
| @@ -39,10 +39,8 @@ using namespace Grid; | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| class NerscIO : public BinaryIO {  | ||||
| public: | ||||
|   typedef Lattice<vLorentzColourMatrixD> GaugeField; | ||||
|  | ||||
|   // Enable/disable exiting if the plaquette in the header does not match the value computed (default true) | ||||
|   static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; } | ||||
|   typedef Lattice<vLorentzColourMatrixD> GaugeField; | ||||
|  | ||||
|   static inline void truncate(std::string file){ | ||||
|     std::ofstream fout(file,std::ios::out); | ||||
| @@ -200,7 +198,7 @@ public: | ||||
|       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||
|       exit(0); | ||||
|     } | ||||
|     if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||
|     assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-1 ); | ||||
|     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||
|     assert(nersc_csum == header.checksum ); | ||||
|        | ||||
|   | ||||
| @@ -63,7 +63,6 @@ static constexpr int Ngp=2; // gparity index range | ||||
| #define ColourIndex  (2) | ||||
| #define SpinIndex    (1) | ||||
| #define LorentzIndex (0) | ||||
| #define GparityFlavourIndex (0) | ||||
|  | ||||
| // Also should make these a named enum type | ||||
| static constexpr int DaggerNo=0; | ||||
| @@ -88,8 +87,6 @@ template<typename T> struct isCoarsened { | ||||
| template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | ||||
| template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | ||||
|  | ||||
| const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom! | ||||
|  | ||||
| // ChrisK very keen to add extra space for Gparity doubling. | ||||
| // | ||||
| // Also add domain wall index, in a way where Wilson operator  | ||||
| @@ -104,7 +101,6 @@ template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iSca | ||||
| template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | ||||
| template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | ||||
| template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | ||||
| template<typename vtype> using iLorentzVector             = iVector<iScalar<iScalar<vtype> >, Nd > ; | ||||
| template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | ||||
| template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | ||||
| template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | ||||
| @@ -114,10 +110,8 @@ template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVec | ||||
|     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >; | ||||
|  | ||||
|  | ||||
| template<typename vtype> using iGparityFlavourVector                = iVector<iScalar<iScalar<vtype> >, Ngp>; | ||||
| template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >; | ||||
| template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >; | ||||
| template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>; | ||||
|  | ||||
| // Spin matrix | ||||
| typedef iSpinMatrix<Complex  >          SpinMatrix; | ||||
| @@ -164,16 +158,7 @@ typedef iSpinColourSpinColourMatrix<vComplex >    vSpinColourSpinColourMatrix; | ||||
| typedef iSpinColourSpinColourMatrix<vComplexF>    vSpinColourSpinColourMatrixF; | ||||
| typedef iSpinColourSpinColourMatrix<vComplexD>    vSpinColourSpinColourMatrixD; | ||||
|  | ||||
| // LorentzVector | ||||
| typedef iLorentzVector<Complex  > LorentzVector; | ||||
| typedef iLorentzVector<ComplexF > LorentzVectorF; | ||||
| typedef iLorentzVector<ComplexD > LorentzVectorD; | ||||
|  | ||||
| typedef iLorentzVector<vComplex > vLorentzVector; | ||||
| typedef iLorentzVector<vComplexF> vLorentzVectorF; | ||||
| typedef iLorentzVector<vComplexD> vLorentzVectorD; | ||||
|  | ||||
| // LorentzColourMatrix | ||||
| // LorentzColour | ||||
| typedef iLorentzColourMatrix<Complex  > LorentzColourMatrix; | ||||
| typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF; | ||||
| typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD; | ||||
| @@ -191,16 +176,6 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix; | ||||
| typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF; | ||||
| typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD; | ||||
|  | ||||
| //G-parity flavour matrix | ||||
| typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix; | ||||
| typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF; | ||||
| typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD; | ||||
|  | ||||
| typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix; | ||||
| typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF; | ||||
| typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD; | ||||
|  | ||||
|  | ||||
| // Spin vector | ||||
| typedef iSpinVector<Complex >           SpinVector; | ||||
| typedef iSpinVector<ComplexF>           SpinVectorF; | ||||
| @@ -245,16 +220,6 @@ typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD; | ||||
| typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector; | ||||
| typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF; | ||||
| typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD; | ||||
|  | ||||
| //G-parity flavour vector | ||||
| typedef iGparityFlavourVector<Complex >         GparityFlavourVector; | ||||
| typedef iGparityFlavourVector<ComplexF>         GparityFlavourVectorF; | ||||
| typedef iGparityFlavourVector<ComplexD>         GparityFlavourVectorD; | ||||
|  | ||||
| typedef iGparityFlavourVector<vComplex >         vGparityFlavourVector; | ||||
| typedef iGparityFlavourVector<vComplexF>         vGparityFlavourVectorF; | ||||
| typedef iGparityFlavourVector<vComplexD>         vGparityFlavourVectorD; | ||||
|  | ||||
|      | ||||
| // singlets | ||||
| typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type. | ||||
| @@ -298,10 +263,6 @@ typedef Lattice<vLorentzColourMatrix>  LatticeLorentzColourMatrix; | ||||
| typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF; | ||||
| typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD; | ||||
|  | ||||
| typedef Lattice<vLorentzVector>  LatticeLorentzVector; | ||||
| typedef Lattice<vLorentzVectorF> LatticeLorentzVectorF; | ||||
| typedef Lattice<vLorentzVectorD> LatticeLorentzVectorD; | ||||
|  | ||||
| // DoubleStored gauge field | ||||
| typedef Lattice<vDoubleStoredColourMatrix>  LatticeDoubleStoredColourMatrix; | ||||
| typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF; | ||||
|   | ||||
| @@ -30,7 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #pragma once | ||||
| #ifndef GRID_QCD_ACTION_H | ||||
| #define GRID_QCD_ACTION_H | ||||
|  | ||||
| //////////////////////////////////////////// | ||||
| // Abstract base interface | ||||
| @@ -50,4 +51,4 @@ NAMESPACE_CHECK(Fermion); | ||||
| #include <Grid/qcd/action/pseudofermion/PseudoFermion.h> | ||||
| NAMESPACE_CHECK(PseudoFermion); | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -40,29 +40,6 @@ class Action | ||||
|  | ||||
| public: | ||||
|   bool is_smeared = false; | ||||
|   RealD deriv_norm_sum; | ||||
|   RealD deriv_max_sum; | ||||
|   int   deriv_num; | ||||
|   RealD deriv_us; | ||||
|   RealD S_us; | ||||
|   RealD refresh_us; | ||||
|   void  reset_timer(void)        { | ||||
|     deriv_us = S_us = refresh_us = 0.0; | ||||
|     deriv_num=0; | ||||
|     deriv_norm_sum = deriv_max_sum=0.0; | ||||
|   } | ||||
|   void  deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;} | ||||
|   RealD deriv_max_average(void)         { return deriv_max_sum/deriv_num; }; | ||||
|   RealD deriv_norm_average(void)        { return deriv_norm_sum/deriv_num; }; | ||||
|   RealD deriv_timer(void)        { return deriv_us; }; | ||||
|   RealD S_timer(void)            { return deriv_us; }; | ||||
|   RealD refresh_timer(void)      { return deriv_us; }; | ||||
|   void deriv_timer_start(void)   { deriv_us-=usecond(); } | ||||
|   void deriv_timer_stop(void)    { deriv_us+=usecond(); } | ||||
|   void refresh_timer_start(void) { refresh_us-=usecond(); } | ||||
|   void refresh_timer_stop(void)  { refresh_us+=usecond(); } | ||||
|   void S_timer_start(void)       { S_us-=usecond(); } | ||||
|   void S_timer_stop(void)        { S_us+=usecond(); } | ||||
|   // Heatbath? | ||||
|   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions | ||||
|   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action | ||||
|   | ||||
| @@ -58,8 +58,6 @@ NAMESPACE_CHECK(Scalar); | ||||
| //////////////////////////////////////////// | ||||
| // Utility functions | ||||
| //////////////////////////////////////////// | ||||
| #include <Grid/qcd/action/domains/Domains.h> | ||||
|  | ||||
| #include <Grid/qcd/utils/Metric.h> | ||||
| NAMESPACE_CHECK(Metric); | ||||
| #include <Grid/qcd/utils/CovariantLaplacian.h> | ||||
|   | ||||
| @@ -36,34 +36,28 @@ NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| // These can move into a params header and be given MacroMagic serialisation | ||||
| struct GparityWilsonImplParams { | ||||
|   Coordinate twists; //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||
|                      //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs | ||||
|   bool locally_periodic; | ||||
|   GparityWilsonImplParams() : twists(Nd, 0), locally_periodic(false) {}; | ||||
|   Coordinate twists; | ||||
|   GparityWilsonImplParams() : twists(Nd, 0) {}; | ||||
| }; | ||||
|    | ||||
| struct WilsonImplParams { | ||||
|   bool overlapCommsCompute; | ||||
|   bool locally_periodic; | ||||
|   AcceleratorVector<Real,Nd> twist_n_2pi_L; | ||||
|   AcceleratorVector<Complex,Nd> boundary_phases; | ||||
|   WilsonImplParams()  { | ||||
|     boundary_phases.resize(Nd, 1.0); | ||||
|       twist_n_2pi_L.resize(Nd, 0.0); | ||||
|       locally_periodic = false; | ||||
|   }; | ||||
|   WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { | ||||
|     twist_n_2pi_L.resize(Nd, 0.0); | ||||
|     locally_periodic = false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| struct StaggeredImplParams { | ||||
|   bool locally_periodic; | ||||
|   StaggeredImplParams() : locally_periodic(false) {}; | ||||
|   StaggeredImplParams()  {}; | ||||
| }; | ||||
|    | ||||
| struct OneFlavourRationalParams : Serializable { | ||||
|   struct OneFlavourRationalParams : Serializable { | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,  | ||||
| 				    RealD, lo,  | ||||
| 				    RealD, hi,  | ||||
| @@ -91,50 +85,6 @@ struct OneFlavourRationalParams : Serializable { | ||||
|         precision(_precision), | ||||
|         BoundsCheckFreq(_BoundsCheckFreq){}; | ||||
|   }; | ||||
|  | ||||
|  | ||||
|   /*Action parameters for the generalized rational action | ||||
|     The approximation is for (M^dag M)^{1/inv_pow} | ||||
|     where inv_pow is the denominator of the fractional power. | ||||
|     Default inv_pow=2 for square root, making this equivalent to  | ||||
|     the OneFlavourRational action | ||||
|   */ | ||||
|     struct RationalActionParams : Serializable { | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams,  | ||||
| 				    int, inv_pow,  | ||||
| 				    RealD, lo, //low eigenvalue bound of rational approx | ||||
| 				    RealD, hi, //high eigenvalue bound of rational approx | ||||
| 				    int,   MaxIter,  //maximum iterations in msCG | ||||
| 				    RealD, action_tolerance,  //msCG tolerance in action evaluation | ||||
| 				    int,   action_degree, //rational approx tolerance in action evaluation | ||||
| 				    RealD, md_tolerance,  //msCG tolerance in MD integration | ||||
| 				    int,   md_degree, //rational approx tolerance in MD integration | ||||
| 				    int,   precision, //precision of floating point arithmetic | ||||
| 				    int,   BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check | ||||
|   // constructor  | ||||
|   RationalActionParams(int _inv_pow = 2, | ||||
| 		       RealD _lo      = 0.0,  | ||||
| 		       RealD _hi      = 1.0,  | ||||
| 		       int _maxit     = 1000, | ||||
| 		       RealD _action_tolerance      = 1.0e-8,  | ||||
| 		       int _action_degree    = 10, | ||||
| 		       RealD _md_tolerance      = 1.0e-8,  | ||||
| 		       int _md_degree    = 10, | ||||
| 		       int _precision = 64, | ||||
| 		       int _BoundsCheckFreq=20) | ||||
|     : inv_pow(_inv_pow),  | ||||
|       lo(_lo), | ||||
|       hi(_hi), | ||||
|       MaxIter(_maxit), | ||||
|       action_tolerance(_action_tolerance), | ||||
|       action_degree(_action_degree), | ||||
|       md_tolerance(_md_tolerance), | ||||
|       md_degree(_md_degree), | ||||
|       precision(_precision), | ||||
|       BoundsCheckFreq(_BoundsCheckFreq){}; | ||||
|   }; | ||||
|  | ||||
|  | ||||
|    | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|   | ||||
| @@ -1,98 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/qcd/action/momentum/DirichletFilter.h | ||||
|  | ||||
| Copyright (C) 2021 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| 			   /*  END LEGAL */ | ||||
|  | ||||
| //////////////////////////////////////////////////// | ||||
| // Dirichlet filter with sub-block size B[mu] | ||||
| //////////////////////////////////////////////////// | ||||
| #pragma once  | ||||
|  | ||||
| #include <Grid/qcd/action/domains/DomainDecomposition.h> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|  | ||||
| template<typename MomentaField> | ||||
| struct DirichletFilter: public MomentumFilterBase<MomentaField> | ||||
| { | ||||
|   Coordinate Block; | ||||
|    | ||||
|   DirichletFilter(const Coordinate &_Block): Block(_Block) {} | ||||
|  | ||||
|   // Edge detect using domain projectors | ||||
|   void applyFilter (MomentaField &U) const override | ||||
|   { | ||||
|     DomainDecomposition Domains(Block); | ||||
|     GridBase *grid = U.Grid(); | ||||
|     LatticeInteger  coor(grid); | ||||
|     LatticeInteger  face(grid); | ||||
|     LatticeInteger  one(grid);   one = 1; | ||||
|     LatticeInteger  zero(grid); zero = 0; | ||||
|     LatticeInteger  omega(grid); | ||||
|     LatticeInteger  omegabar(grid); | ||||
|     LatticeInteger  tmp(grid); | ||||
|  | ||||
|     omega=one;    Domains.ProjectDomain(omega,0); | ||||
|     omegabar=one; Domains.ProjectDomain(omegabar,1); | ||||
|      | ||||
|     LatticeInteger nface(grid); nface=Zero(); | ||||
|      | ||||
|     MomentaField projected(grid); projected=Zero(); | ||||
|     typedef decltype(PeekIndex<LorentzIndex>(U,0)) MomentaLinkField; | ||||
|     MomentaLinkField  Umu(grid); | ||||
|     MomentaLinkField   zz(grid); zz=Zero(); | ||||
|  | ||||
|     int dims = grid->Nd(); | ||||
|     Coordinate Global=grid->GlobalDimensions(); | ||||
|     assert(dims==Nd); | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|  | ||||
|       if ( Block[mu]!=0 ) { | ||||
|  | ||||
| 	Umu = PeekIndex<LorentzIndex>(U,mu); | ||||
|  | ||||
| 	// Upper face  | ||||
|  	tmp = Cshift(omegabar,mu,1); | ||||
| 	tmp = tmp + omega; | ||||
| 	face = where(tmp == Integer(2),one,zero ); | ||||
|  | ||||
|  	tmp = Cshift(omega,mu,1); | ||||
| 	tmp = tmp + omegabar; | ||||
| 	face = where(tmp == Integer(2),one,face ); | ||||
|  | ||||
| 	Umu = where(face,zz,Umu); | ||||
|  | ||||
| 	PokeIndex<LorentzIndex>(U, Umu, mu); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|    | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
| @@ -1,187 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/qcd/action/domains/DomainDecomposition.h | ||||
|  | ||||
| Copyright (C) 2021 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| 			   /*  END LEGAL */ | ||||
|  | ||||
| //////////////////////////////////////////////////// | ||||
| // Dirichlet filter with sub-block size B[mu] | ||||
| //////////////////////////////////////////////////// | ||||
| #pragma once  | ||||
|  | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|  | ||||
| struct DomainDecomposition | ||||
| { | ||||
|   Coordinate Block; | ||||
|   static constexpr RealD factor = 0.6; | ||||
|  | ||||
|   DomainDecomposition(const Coordinate &_Block): Block(_Block){ assert(Block.size()==Nd);}; | ||||
|    | ||||
|   template<class Field> | ||||
|   void ProjectDomain(Field &f,Integer domain) | ||||
|   { | ||||
|     GridBase *grid = f.Grid(); | ||||
|     int dims = grid->Nd(); | ||||
|     int isDWF= (dims==Nd+1); | ||||
|     assert((dims==Nd)||(dims==Nd+1)); | ||||
|  | ||||
|     Field   zz(grid);  zz = Zero(); | ||||
|     LatticeInteger coor(grid); | ||||
|     LatticeInteger domaincoor(grid); | ||||
|     LatticeInteger mask(grid); mask = Integer(1); | ||||
|     LatticeInteger zi(grid);     zi = Integer(0); | ||||
|     for(int d=0;d<Nd;d++){ | ||||
|       Integer B= Block[d]; | ||||
|       if ( B ) { | ||||
| 	LatticeCoordinate(coor,d+isDWF); | ||||
| 	domaincoor = mod(coor,B); | ||||
| 	mask = where(domaincoor==Integer(0),zi,mask); | ||||
| 	mask = where(domaincoor==Integer(B-1),zi,mask); | ||||
|       } | ||||
|     } | ||||
|     if ( !domain ) | ||||
|       f = where(mask==Integer(1),f,zz); | ||||
|     else  | ||||
|       f = where(mask==Integer(0),f,zz); | ||||
|   }; | ||||
|   template<class GaugeField> | ||||
|   void ProjectDDHMC(GaugeField &U) | ||||
|   { | ||||
|     GridBase *grid = U.Grid(); | ||||
|     Coordinate Global=grid->GlobalDimensions(); | ||||
|     GaugeField zzz(grid); zzz = Zero(); | ||||
|     LatticeInteger coor(grid);  | ||||
|  | ||||
|     GaugeField Uorg(grid); Uorg = U; | ||||
|      | ||||
|     auto zzz_mu = PeekIndex<LorentzIndex>(zzz,0); | ||||
|     //////////////////////////////////////////////////// | ||||
|     // Zero BDY layers | ||||
|     //////////////////////////////////////////////////// | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       Integer B1 = Block[mu]; | ||||
|       if ( B1 && (B1 <= Global[mu]) ) { | ||||
| 	LatticeCoordinate(coor,mu); | ||||
|  | ||||
|  | ||||
| 	//////////////////////////////// | ||||
| 	// OmegaBar - zero all links contained in slice B-1,0 and | ||||
| 	// mu links connecting to Omega | ||||
| 	//////////////////////////////// | ||||
|  | ||||
| 	U    = where(mod(coor,B1)==Integer(B1-1),zzz,U); | ||||
| 	U    = where(mod(coor,B1)==Integer(0)   ,zzz,U);  | ||||
|  | ||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||
| 	U_mu = where(mod(coor,B1)==Integer(B1-2),zzz_mu,U_mu);  | ||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||
|  | ||||
|       } | ||||
|     } | ||||
|     | ||||
|     //////////////////////////////////////////// | ||||
|     // Omega interior slow the evolution | ||||
|     // Tricky as we need to take the smallest of values imposed by each cut | ||||
|     // Do them in order or largest to smallest and smallest writes last | ||||
|     //////////////////////////////////////////// | ||||
|     RealD f= factor; | ||||
| #if 0     | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       Integer B1 = Block[mu]; | ||||
|       if ( B1 && (B1 <= Global[mu]) ) { | ||||
|  | ||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||
| 	// In the plane | ||||
| 	U = where(mod(coor,B1)==Integer(B1-5),Uorg*f,U);  | ||||
| 	U = where(mod(coor,B1)==Integer(4)   ,Uorg*f,U);  | ||||
|  | ||||
| 	// Perp links | ||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-6),Uorg_mu*f,U_mu); | ||||
| 	U_mu = where(mod(coor,B1)==Integer(4)   ,Uorg_mu*f,U_mu); | ||||
|  | ||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       Integer B1 = Block[mu]; | ||||
|       if ( B1 && (B1 <= Global[mu]) ) { | ||||
|  | ||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||
| 	// In the plane | ||||
| 	U = where(mod(coor,B1)==Integer(B1-4),Uorg*f*f,U);  | ||||
| 	U = where(mod(coor,B1)==Integer(3)   ,Uorg*f*f,U);  | ||||
|  | ||||
| 	// Perp links | ||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-5),Uorg_mu*f*f,U_mu); | ||||
| 	U_mu = where(mod(coor,B1)==Integer(3)   ,Uorg_mu*f*f,U_mu); | ||||
|  | ||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||
|       } | ||||
|     } | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       Integer B1 = Block[mu]; | ||||
|       if ( B1 && (B1 <= Global[mu]) ) { | ||||
|  | ||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||
| 	// In the plane | ||||
| 	U = where(mod(coor,B1)==Integer(B1-3),Uorg*f*f*f,U);  | ||||
| 	U = where(mod(coor,B1)==Integer(2)   ,Uorg*f*f*f,U);  | ||||
|  | ||||
| 	// Perp links | ||||
|        	U_mu = where(mod(coor,B1)==Integer(B1-4),Uorg_mu*f*f*f,U_mu); | ||||
| 	U_mu = where(mod(coor,B1)==Integer(2)   ,Uorg_mu*f*f*f,U_mu); | ||||
|  | ||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||
|       } | ||||
|     } | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|       Integer B1 = Block[mu]; | ||||
|       if ( B1 && (B1 <= Global[mu]) ) { | ||||
|  | ||||
| 	auto U_mu   = PeekIndex<LorentzIndex>(U,mu); | ||||
| 	auto Uorg_mu= PeekIndex<LorentzIndex>(Uorg,mu); | ||||
| 	// In the plane | ||||
| 	U = where(mod(coor,B1)==Integer(B1-2),zzz,U);  | ||||
| 	U = where(mod(coor,B1)==Integer(1)   ,zzz,U);  | ||||
|  | ||||
| 	// Perp links | ||||
| 	U_mu = where(mod(coor,B1)==Integer(B1-3),Uorg_mu*f*f*f*f,U_mu); | ||||
| 	U_mu = where(mod(coor,B1)==Integer(1)   ,Uorg_mu*f*f*f*f,U_mu); | ||||
|  | ||||
| 	PokeIndex<LorentzIndex>(U, U_mu, mu); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
| @@ -60,8 +60,6 @@ public: | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   virtual void Dminus(const FermionField &psi, FermionField &chi); | ||||
|   virtual void DminusDag(const FermionField &psi, FermionField &chi); | ||||
|   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported); | ||||
|   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported); | ||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||
|   virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); | ||||
|   virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); | ||||
|   | ||||
| @@ -1,185 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/DirichletFermionOperator.h | ||||
|  | ||||
|     Copyright (C) 2021 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////// | ||||
| // Wrap a fermion operator in Dirichlet BC's at node boundary | ||||
| //////////////////////////////////////////////////////////////// | ||||
|      | ||||
| template<class Impl> | ||||
| class DirichletFermionOperator : public FermionOperator<Impl> | ||||
| { | ||||
| public: | ||||
|  | ||||
|   INHERIT_IMPL_TYPES(Impl); | ||||
|  | ||||
|   // Data members | ||||
|   int CommsMode; | ||||
|   Coordinate Block; | ||||
|   DirichletFilter<GaugeField> Filter; | ||||
|   FermionOperator<Impl> & FermOp; | ||||
|    | ||||
|   // Constructor / bespoke | ||||
|   DirichletFermionOperator(FermionOperator<Impl> & _FermOp, Coordinate &_Block) | ||||
|     : FermOp(_FermOp), Block(_Block), Filter(Block) | ||||
|   { | ||||
|     // Save what the comms mode should be under normal BCs | ||||
|     CommsMode = WilsonKernelsStatic::Comms; | ||||
|     assert((CommsMode == WilsonKernelsStatic::CommsAndCompute) | ||||
|          ||(CommsMode == WilsonKernelsStatic::CommsThenCompute)); | ||||
|  | ||||
|     // Check the block size divides local lattice | ||||
|     GridBase *grid = FermOp.GaugeGrid(); | ||||
|  | ||||
|     int blocks_per_rank = 1; | ||||
|     Coordinate LocalDims = grid->LocalDimensions(); | ||||
|     Coordinate GlobalDims= grid->GlobalDimensions(); | ||||
|     assert(Block.size()==LocalDims.size()); | ||||
|  | ||||
|     for(int d=0;d<LocalDims.size();d++){ | ||||
|       if (Block[d]&&(Block[d]<=GlobalDims[d])){ | ||||
| 	int r = LocalDims[d] % Block[d]; | ||||
| 	assert(r == 0); | ||||
| 	blocks_per_rank *= (LocalDims[d] / Block[d]); | ||||
|       } | ||||
|     } | ||||
|     // Even blocks per node required // could be relaxed but inefficient use of hardware as idle nodes in boundary operator R | ||||
|     assert( blocks_per_rank != 0); | ||||
|  | ||||
|     // Possible checks that SIMD lanes are used with full occupancy??? | ||||
|   }; | ||||
|   virtual ~DirichletFermionOperator(void) = default; | ||||
|  | ||||
|   void DirichletOn(void)   { | ||||
|     assert(WilsonKernelsStatic::Comms!= WilsonKernelsStatic::CommsDirichlet); | ||||
|     //    WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsDirichlet; | ||||
|   } | ||||
|   void DirichletOff(void)  { | ||||
|     //    assert(WilsonKernelsStatic::Comms== WilsonKernelsStatic::CommsDirichlet); | ||||
|     //    WilsonKernelsStatic::Comms = CommsMode; | ||||
|   } | ||||
|  | ||||
|   // Implement the full interface | ||||
|   virtual FermionField &tmp(void) { return FermOp.tmp(); }; | ||||
|  | ||||
|   virtual GridBase *FermionGrid(void)         { return FermOp.FermionGrid(); } | ||||
|   virtual GridBase *FermionRedBlackGrid(void) { return FermOp.FermionRedBlackGrid(); } | ||||
|   virtual GridBase *GaugeGrid(void)           { return FermOp.GaugeGrid(); } | ||||
|   virtual GridBase *GaugeRedBlackGrid(void)   { return FermOp.GaugeRedBlackGrid(); } | ||||
|    | ||||
|   // override multiply | ||||
|   virtual void  M    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.M(in,out);    DirichletOff();  }; | ||||
|   virtual void  Mdag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mdag(in,out); DirichletOff();  }; | ||||
|  | ||||
|   // half checkerboard operaions | ||||
|   virtual void   Meooe       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Meooe(in,out);    DirichletOff(); };   | ||||
|   virtual void   MeooeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MeooeDag(in,out); DirichletOff(); }; | ||||
|   virtual void   Mooee       (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.Mooee(in,out);    DirichletOff(); }; | ||||
|   virtual void   MooeeDag    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeDag(in,out); DirichletOff(); }; | ||||
|   virtual void   MooeeInv    (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInv(in,out); DirichletOff(); }; | ||||
|   virtual void   MooeeInvDag (const FermionField &in, FermionField &out) { DirichletOn(); FermOp.MooeeInvDag(in,out); DirichletOff(); }; | ||||
|  | ||||
|   // non-hermitian hopping term; half cb or both | ||||
|   virtual void Dhop  (const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.Dhop(in,out,dag);    DirichletOff(); }; | ||||
|   virtual void DhopOE(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopOE(in,out,dag);  DirichletOff(); }; | ||||
|   virtual void DhopEO(const FermionField &in, FermionField &out,int dag) { DirichletOn(); FermOp.DhopEO(in,out,dag);  DirichletOff(); }; | ||||
|   virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp) { DirichletOn(); FermOp.DhopDir(in,out,dir,disp);  DirichletOff(); }; | ||||
|  | ||||
|   // force terms; five routines; default to Dhop on diagonal | ||||
|   virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MDeriv(mat,U,V,dag);}; | ||||
|   virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MoeDeriv(mat,U,V,dag);}; | ||||
|   virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeoDeriv(mat,U,V,dag);}; | ||||
|   virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MooDeriv(mat,U,V,dag);}; | ||||
|   virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.MeeDeriv(mat,U,V,dag);}; | ||||
|  | ||||
|   virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDeriv(mat,U,V,dag);}; | ||||
|   virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivEO(mat,U,V,dag);}; | ||||
|   virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){FermOp.DhopDerivOE(mat,U,V,dag);}; | ||||
|  | ||||
|   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);}; | ||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp){FermOp.Mdir(in,out,dir,disp);}; | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)    {FermOp.MdirAll(in,out);}; | ||||
|  | ||||
|   /////////////////////////////////////////////// | ||||
|   // Updates gauge field during HMC | ||||
|   /////////////////////////////////////////////// | ||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return FermOp.GetDoubledGaugeField(); }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return FermOp.GetDoubledGaugeFieldE(); }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return FermOp.GetDoubledGaugeFieldO(); }; | ||||
|   virtual void ImportGauge(const GaugeField & _U) | ||||
|   { | ||||
|     GaugeField U = _U; | ||||
|     // Filter gauge field to apply Dirichlet | ||||
|     Filter.applyFilter(U); | ||||
|     FermOp.ImportGauge(U); | ||||
|   } | ||||
|   /////////////////////////////////////////////// | ||||
|   // Physical field import/export | ||||
|   /////////////////////////////////////////////// | ||||
|   virtual void Dminus(const FermionField &psi, FermionField &chi)    { FermOp.Dminus(psi,chi); } | ||||
|   virtual void DminusDag(const FermionField &psi, FermionField &chi) { FermOp.DminusDag(psi,chi); } | ||||
|   virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported)   { FermOp.ImportFourDimPseudoFermion(input,imported);} | ||||
|   virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported){ FermOp.ExportFourDimPseudoFermion(solution,exported);} | ||||
|   virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported)  { FermOp.ImportPhysicalFermionSource(input,imported);} | ||||
|   virtual void ImportUnphysicalFermion(const FermionField &input,FermionField &imported)      { FermOp.ImportUnphysicalFermion(input,imported);} | ||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution,FermionField &exported) {FermOp.ExportPhysicalFermionSolution(solution,exported);} | ||||
|   virtual void ExportPhysicalFermionSource(const FermionField &solution,FermionField &exported)   {FermOp.ExportPhysicalFermionSource(solution,exported);} | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   // Should never be used | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||
|   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {assert(0);} | ||||
|   virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { assert(0);} | ||||
|   virtual void ContractConservedCurrent(PropagatorField &q_in_1, | ||||
| 					PropagatorField &q_in_2, | ||||
| 					PropagatorField &q_out, | ||||
| 					PropagatorField &phys_src, | ||||
| 					Current curr_type, | ||||
| 					unsigned int mu) | ||||
|   {assert(0);}; | ||||
|   virtual void SeqConservedCurrent(PropagatorField &q_in,  | ||||
| 				   PropagatorField &q_out, | ||||
| 				   PropagatorField &phys_src, | ||||
| 				   Current curr_type, | ||||
| 				   unsigned int mu, | ||||
| 				   unsigned int tmin,  | ||||
| 				   unsigned int tmax, | ||||
| 				   ComplexField &lattice_cmplx) | ||||
|   {assert(0);}; | ||||
|       // Only reimplemented in Wilson5D  | ||||
|       // Default to just a zero correlation function | ||||
|   virtual void ContractJ5q(FermionField &q_in   ,ComplexField &J5q) { J5q=Zero(); }; | ||||
|   virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); }; | ||||
|    | ||||
| }; | ||||
|  | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| @@ -101,12 +101,6 @@ NAMESPACE_CHECK(WilsonTM5); | ||||
| #include <Grid/qcd/action/fermion/PauliVillarsInverters.h> | ||||
| #include <Grid/qcd/action/fermion/Reconstruct5Dprop.h> | ||||
| #include <Grid/qcd/action/fermion/MADWF.h> | ||||
| //////////////////////////////////////////////////////////////////// | ||||
| // DDHMC related  | ||||
| //////////////////////////////////////////////////////////////////// | ||||
| #include <Grid/qcd/action/fermion/DirichletFermionOperator.h> | ||||
| #include <Grid/qcd/action/fermion/SchurFactoredFermionOperator.h> | ||||
|  | ||||
| NAMESPACE_CHECK(DWFutils); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -121,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR; | ||||
| typedef WilsonFermion<WilsonImplF> WilsonFermionF; | ||||
| typedef WilsonFermion<WilsonImplD> WilsonFermionD; | ||||
|  | ||||
| //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | ||||
| //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | ||||
| //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | ||||
| typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; | ||||
| typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; | ||||
| typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; | ||||
|  | ||||
| typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; | ||||
| typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; | ||||
| @@ -164,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; | ||||
| typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; | ||||
| typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; | ||||
|  | ||||
| //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | ||||
| //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | ||||
| //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | ||||
| typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; | ||||
| typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; | ||||
| typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; | ||||
|  | ||||
| typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; | ||||
| typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; | ||||
| typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; | ||||
|  | ||||
| //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | ||||
| //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | ||||
| //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | ||||
| typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; | ||||
| typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; | ||||
| typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; | ||||
|  | ||||
| typedef MobiusFermion<WilsonImplR> MobiusFermionR; | ||||
| typedef MobiusFermion<WilsonImplF> MobiusFermionF; | ||||
| typedef MobiusFermion<WilsonImplD> MobiusFermionD; | ||||
|  | ||||
| //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | ||||
| //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | ||||
| //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | ||||
| typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; | ||||
| typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; | ||||
| typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; | ||||
|  | ||||
| typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; | ||||
| typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; | ||||
| typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; | ||||
|  | ||||
| //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | ||||
| //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | ||||
| //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | ||||
| typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; | ||||
| typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; | ||||
| typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; | ||||
|  | ||||
| typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; | ||||
| typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; | ||||
| typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; | ||||
|  | ||||
| //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | ||||
| //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | ||||
| //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | ||||
| typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; | ||||
| typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; | ||||
| typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; | ||||
|  | ||||
| // Ls vectorised | ||||
| typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; | ||||
| @@ -241,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR>     GparityWilsonFermionR; | ||||
| typedef WilsonFermion<GparityWilsonImplF>     GparityWilsonFermionF; | ||||
| typedef WilsonFermion<GparityWilsonImplD>     GparityWilsonFermionD; | ||||
|  | ||||
| //typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | ||||
| //typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | ||||
| //typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | ||||
| typedef WilsonFermion<GparityWilsonImplRL>     GparityWilsonFermionRL; | ||||
| typedef WilsonFermion<GparityWilsonImplFH>     GparityWilsonFermionFH; | ||||
| typedef WilsonFermion<GparityWilsonImplDF>     GparityWilsonFermionDF; | ||||
|  | ||||
| typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; | ||||
| typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; | ||||
| typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; | ||||
|  | ||||
| //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | ||||
| //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | ||||
| //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | ||||
| typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; | ||||
| typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; | ||||
| typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; | ||||
|  | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; | ||||
|  | ||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | ||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | ||||
| //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; | ||||
| typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; | ||||
|  | ||||
| typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; | ||||
| typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; | ||||
| typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; | ||||
|  | ||||
| //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | ||||
| //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | ||||
| //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | ||||
| typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; | ||||
| typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; | ||||
| typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; | ||||
|  | ||||
| typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; | ||||
| typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; | ||||
| typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; | ||||
|  | ||||
| //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | ||||
| //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | ||||
| //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | ||||
| typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; | ||||
| typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; | ||||
| typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; | ||||
|  | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; | ||||
|  | ||||
| //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | ||||
| //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | ||||
| //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; | ||||
| typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; | ||||
|  | ||||
| typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; | ||||
| typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; | ||||
|   | ||||
| @@ -25,7 +25,8 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk> | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #pragma once | ||||
| #ifndef  GRID_QCD_FERMION_CORE_H | ||||
| #define  GRID_QCD_FERMION_CORE_H | ||||
|  | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| @@ -44,3 +45,4 @@ NAMESPACE_CHECK(FermionOperator); | ||||
| #include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions | ||||
| NAMESPACE_CHECK(Kernels); | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -140,9 +140,6 @@ public: | ||||
|   // Updates gauge field during HMC | ||||
|   /////////////////////////////////////////////// | ||||
|   virtual void ImportGauge(const GaugeField & _U)=0; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  =0; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void)  =0; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void)  =0; | ||||
|  | ||||
|   ////////////////////////////////////////////////////////////////////// | ||||
|   // Conserved currents, either contract at sink or insert sequentially. | ||||
| @@ -174,16 +171,6 @@ public: | ||||
|       /////////////////////////////////////////////// | ||||
|       virtual void Dminus(const FermionField &psi, FermionField &chi)    { chi=psi; } | ||||
|       virtual void DminusDag(const FermionField &psi, FermionField &chi) { chi=psi; } | ||||
|  | ||||
|       virtual void ImportFourDimPseudoFermion(const FermionField &input,FermionField &imported) | ||||
|       { | ||||
| 	imported = input; | ||||
|       }; | ||||
|       virtual void ExportFourDimPseudoFermion(const FermionField &solution,FermionField &exported) | ||||
|       { | ||||
| 	exported=solution; | ||||
|       }; | ||||
|  | ||||
|       virtual void ImportPhysicalFermionSource(const FermionField &input,FermionField &imported) | ||||
|       { | ||||
| 	imported = input; | ||||
|   | ||||
| @@ -30,18 +30,6 @@ directory | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| /* | ||||
|   Policy implementation for G-parity boundary conditions | ||||
|  | ||||
|   Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular | ||||
|   field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first | ||||
|   with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field | ||||
|   to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to | ||||
|   either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor. | ||||
|  | ||||
|   Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||
|   mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs | ||||
|  */ | ||||
| template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal> | ||||
| class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > { | ||||
| public: | ||||
| @@ -125,7 +113,7 @@ public: | ||||
|     || ((distance== 1)&&(icoor[direction]==1)) | ||||
|     || ((distance==-1)&&(icoor[direction]==0)); | ||||
|  | ||||
|     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction | ||||
|     permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world | ||||
|  | ||||
|     //Apply the links | ||||
|     int f_upper = permute_lane ? 1 : 0; | ||||
| @@ -151,10 +139,10 @@ public: | ||||
|     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code | ||||
|     assert((sl == 1) || (sl == 2)); | ||||
|  | ||||
|     //If this site is an global boundary site, perform the G-parity flavor twist | ||||
|     if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) { | ||||
|     if ( SE->_around_the_world && St.parameters.twists[mmu] ) { | ||||
|  | ||||
|       if ( sl == 2 ) { | ||||
| 	//Only do the twist for lanes on the edge of the physical node | ||||
|         | ||||
| 	ExtractBuffer<sobj> vals(Nsimd); | ||||
|  | ||||
| 	extract(chi,vals); | ||||
| @@ -209,19 +197,6 @@ public: | ||||
|     reg = memory; | ||||
|   } | ||||
|  | ||||
|  | ||||
|   //Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds | ||||
|   inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){ | ||||
|     autoView(poke_f0_v, poke_f0, CpuRead); | ||||
|     autoView(poke_f1_v, poke_f1, CpuRead); | ||||
|     autoView(Uds_v, Uds, CpuWrite); | ||||
|     thread_foreach(ss,poke_f0_v,{ | ||||
| 	Uds_v[ss](0)(mu) = poke_f0_v[ss](); | ||||
| 	Uds_v[ss](1)(mu) = poke_f1_v[ss](); | ||||
|       }); | ||||
|   } | ||||
|      | ||||
|  | ||||
|   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) | ||||
|   { | ||||
|     conformable(Uds.Grid(),GaugeGrid); | ||||
| @@ -232,19 +207,14 @@ public: | ||||
|     GaugeLinkField Uconj(GaugeGrid); | ||||
|     | ||||
|     Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||
|  | ||||
|     //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction.  | ||||
|     //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs         | ||||
|     for(int mu=0;mu<Nd-1;mu++){ | ||||
|  | ||||
|       if( Params.twists[mu] ){ | ||||
| 	LatticeCoordinate(coor,mu); | ||||
|       } | ||||
|          | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|            | ||||
|       LatticeCoordinate(coor,mu); | ||||
|            | ||||
|       U     = PeekIndex<LorentzIndex>(Umu,mu); | ||||
|       Uconj = conjugate(U); | ||||
|       | ||||
|       // Implement the isospin rotation sign on the boundary between f=1 and f=0 | ||||
|       // This phase could come from a simple bc 1,1,-1,1 .. | ||||
|       int neglink = GaugeGrid->GlobalDimensions()[mu]-1; | ||||
|       if ( Params.twists[mu] ) {  | ||||
| @@ -259,7 +229,7 @@ public: | ||||
| 	thread_foreach(ss,U_v,{ | ||||
| 	    Uds_v[ss](0)(mu) = U_v[ss](); | ||||
| 	    Uds_v[ss](1)(mu) = Uconj_v[ss](); | ||||
| 	}); | ||||
| 	  }); | ||||
|       } | ||||
|            | ||||
|       U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary | ||||
| @@ -290,38 +260,6 @@ public: | ||||
|         }); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     { //periodic / antiperiodic temporal BCs | ||||
|       int mu = Nd-1; | ||||
|       int L   = GaugeGrid->GlobalDimensions()[mu]; | ||||
|       int Lmu = L - 1; | ||||
|  | ||||
|       LatticeCoordinate(coor, mu); | ||||
|  | ||||
|       U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links | ||||
|        | ||||
|       GaugeLinkField *Upoke = &U; | ||||
|  | ||||
|       if(Params.twists[mu]){ //antiperiodic | ||||
| 	Utmp =  where(coor == Lmu, -U, U); | ||||
| 	Upoke = &Utmp; | ||||
|       } | ||||
|      | ||||
|       Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links       | ||||
|       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu); | ||||
|  | ||||
|       //Get the barrel-shifted field | ||||
|       Utmp = adj(Cshift(U, mu, -1)); //is a forward shift! | ||||
|       Upoke = &Utmp; | ||||
|  | ||||
|       if(Params.twists[mu]){ | ||||
| 	U = where(coor == 0, -Utmp, Utmp);  //boundary phase | ||||
| 	Upoke = &U; | ||||
|       } | ||||
|        | ||||
|       Uconj = conjugate(*Upoke); | ||||
|       pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4); | ||||
|     } | ||||
|   } | ||||
|        | ||||
|   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { | ||||
| @@ -360,48 +298,28 @@ public: | ||||
|   inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){ | ||||
|     assert(0); | ||||
|   } | ||||
|   | ||||
|   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { | ||||
|     int Ls=Btilde.Grid()->_fdimensions[0]; | ||||
|      | ||||
|     { | ||||
|       GridBase *GaugeGrid = mat.Grid(); | ||||
|       Lattice<iScalar<vInteger> > coor(GaugeGrid); | ||||
|  | ||||
|       if( Params.twists[mu] ){ | ||||
| 	LatticeCoordinate(coor,mu); | ||||
|       } | ||||
|  | ||||
|       autoView( mat_v , mat, AcceleratorWrite); | ||||
|       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||
|       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||
|       accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{	   | ||||
|   	  int sU=sss; | ||||
|   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; | ||||
|   	  ColorMatrixType sum; | ||||
|   	  zeroit(sum); | ||||
|   	  for(int s=0;s<Ls;s++){ | ||||
|   	    int sF = s+Ls*sU; | ||||
|   	    for(int spn=0;spn<Ns;spn++){ //sum over spin | ||||
| 	      //Flavor 0 | ||||
|   	      auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector | ||||
|   	      auto aa = coalescedRead(Atilde_v[sF](0)(spn) ); | ||||
|   	      sum = sum + outerProduct(bb,aa); | ||||
|  | ||||
|   	      //Flavor 1 | ||||
|   	      bb = coalescedRead(Btilde_v[sF](1)(spn) ); | ||||
|   	      aa = coalescedRead(Atilde_v[sF](1)(spn) ); | ||||
|   	      sum = sum + conjugate(outerProduct(bb,aa)); | ||||
|   	    } | ||||
|   	  }	     | ||||
|   	  coalescedWrite(mat_v[sU](mu)(), sum); | ||||
|   	}); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
|    | ||||
|   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { | ||||
|  | ||||
|     int Ls = Btilde.Grid()->_fdimensions[0]; | ||||
|          | ||||
|     GaugeLinkField tmp(mat.Grid()); | ||||
|     tmp = Zero(); | ||||
|     { | ||||
|       autoView( tmp_v , tmp, CpuWrite); | ||||
|       autoView( Atilde_v , Atilde, CpuRead); | ||||
|       autoView( Btilde_v , Btilde, CpuRead); | ||||
|       thread_for(ss,tmp.Grid()->oSites(),{ | ||||
| 	  for (int s = 0; s < Ls; s++) { | ||||
| 	    int sF = s + Ls * ss; | ||||
| 	    auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF])); | ||||
| 	    tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); | ||||
| 	  } | ||||
| 	}); | ||||
|     } | ||||
|     PokeIndex<LorentzIndex>(mat, tmp, mu); | ||||
|     return; | ||||
|   } | ||||
|    | ||||
| }; | ||||
|  | ||||
| @@ -409,8 +327,8 @@ typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> Gparit | ||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF;  // Float | ||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD;  // Double | ||||
|   | ||||
| //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | ||||
| //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | ||||
| //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | ||||
| typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL;  // Real.. whichever prec | ||||
| typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH;  // Float | ||||
| typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF;  // Double | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|   | ||||
| @@ -141,11 +141,8 @@ public: | ||||
|   void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); | ||||
|   void ImportGaugeSimple(const GaugeField &_UUU    ,const GaugeField &_U); | ||||
|   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; | ||||
|   virtual DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||
|   virtual DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||
|   DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||
|   void CopyGaugeCheckerboards(void); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -160,20 +160,17 @@ public: | ||||
| 			       RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0, | ||||
| 			     const ImplParams &p= ImplParams()); | ||||
|      | ||||
|   // DoubleStore gauge field in operator | ||||
|   void ImportGauge      (const GaugeField &_Uthin ) { assert(0); } | ||||
|     // DoubleStore gauge field in operator | ||||
|     void ImportGauge      (const GaugeField &_Uthin ) { assert(0); } | ||||
|   void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat); | ||||
|   void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U); | ||||
|   void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||
|   // Give a reference; can be used to do an assignment or copy back out after import | ||||
|   // if Carleton wants to cache them and not use the ImportSimple | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeField(void)  override { return Umu; }; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldE(void) override { return UmuEven; }; | ||||
|   virtual DoubledGaugeField &GetDoubledGaugeFieldO(void) override { return UmuOdd; }; | ||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||
|   DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||
|   void CopyGaugeCheckerboards(void); | ||||
|    | ||||
|     void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U); | ||||
|     void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); | ||||
|     // Give a reference; can be used to do an assignment or copy back out after import | ||||
|     // if Carleton wants to cache them and not use the ImportSimple | ||||
|     DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||
|     DoubledGaugeField &GetUUU(void) { return UUUmu; }; | ||||
|     void CopyGaugeCheckerboards(void); | ||||
|      | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Data members require to support the functionality | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -135,9 +135,6 @@ public: | ||||
|  | ||||
|   // DoubleStore impl dependent | ||||
|   void ImportGauge      (const GaugeField &_U ); | ||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||
|   DoubledGaugeField &GetU(void)   { return Umu ; } ; | ||||
|   void CopyGaugeCheckerboards(void); | ||||
|  | ||||
|   | ||||
| @@ -1,534 +0,0 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/qcd/action/fermion/SchurFactoredFermionOperator.h | ||||
|  | ||||
|     Copyright (C) 2021 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| #include <Grid/qcd/utils/MixedPrecisionOperatorFunction.h> | ||||
| #include <Grid/qcd/action/domains/Domains.h> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|   //////////////////////////////////////////////////////// | ||||
|   // Some explanation of class structure for domain decomposition: | ||||
|   // | ||||
|   // Need a dirichlet operator for two flavour determinant - acts on both Omega and OmegaBar. | ||||
|   // | ||||
|   // Possible gain if the global sums and CG are run independently?? Could measure this. | ||||
|   // | ||||
|   // Types of operations | ||||
|   // | ||||
|   // 1) assemble local det dOmega det dOmegaBar pseudofermion | ||||
|   // | ||||
|   // - DirichletFermionOperator - can either do a global solve, or independent/per cell coefficients. | ||||
|   // | ||||
|   // 2) assemble dOmegaInverse and dOmegaBarInverse in R | ||||
|   // | ||||
|   // - DirichletFermionOperator - can also be used to  | ||||
|   //                                       - need two or more cells per node. Options | ||||
|   //                                       - a) solve one cell at a time, no new code, CopyRegion and reduced /split Grids | ||||
|   //                                       - b) solve multiple cells in parallel. predicated dslash implementation | ||||
|   // | ||||
|   //                                       - b) has more parallelism, experience with block solver suggest might not be aalgorithmically inefficient | ||||
|   //                                         a) has more cache friendly and easier code. | ||||
|   //                                         b) is easy to implement in a "trial" or inefficient code with projection. | ||||
|   // | ||||
|   // 3)  Additional functionality for domain operations | ||||
|   // | ||||
|   // - SchurFactoredFermionOperator  - Need a DDHMC utility - whether used in two flavour or one flavour  | ||||
|   // | ||||
|   // - dBoundary - needs non-dirichlet operator | ||||
|   // - Contains one Dirichlet Op, and one non-Dirichlet op. Implements dBoundary etc... | ||||
|   // - The Dirichlet ops can be passed to dOmega(Bar) solvers etc... | ||||
|   // | ||||
|   //////////////////////////////////////////////////////// | ||||
|  | ||||
|  | ||||
| template<class ImplD,class ImplF> | ||||
| class SchurFactoredFermionOperator : public ImplD | ||||
| { | ||||
|   INHERIT_IMPL_TYPES(ImplD); | ||||
|    | ||||
|   typedef typename ImplF::FermionField FermionFieldF; | ||||
|   typedef typename ImplD::FermionField FermionFieldD; | ||||
|  | ||||
|   typedef SchurDiagMooeeOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorD; | ||||
|   typedef SchurDiagMooeeOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorF; | ||||
|   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplD>,FermionFieldD> LinearOperatorDagD; | ||||
|   typedef SchurDiagMooeeDagOperator<FermionOperator<ImplF>,FermionFieldF> LinearOperatorDagF; | ||||
|  | ||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, | ||||
| 							  FermionOperator<ImplF>, | ||||
| 							  LinearOperatorD, | ||||
| 							  LinearOperatorF> MxPCG; | ||||
|  | ||||
|   typedef MixedPrecisionConjugateGradientOperatorFunction<FermionOperator<ImplD>, | ||||
| 							  FermionOperator<ImplF>, | ||||
| 							  LinearOperatorDagD, | ||||
| 							  LinearOperatorDagF> MxDagPCG; | ||||
| public: | ||||
|  | ||||
|   GridBase *FermionGrid(void) { return PeriodicFermOpD.FermionGrid(); }; | ||||
|   GridBase *GaugeGrid(void)   { return PeriodicFermOpD.GaugeGrid(); }; | ||||
|    | ||||
|   FermionOperator<ImplD> & DirichletFermOpD; | ||||
|   FermionOperator<ImplF> & DirichletFermOpF; | ||||
|   FermionOperator<ImplD> & PeriodicFermOpD;  | ||||
|   FermionOperator<ImplF> & PeriodicFermOpF;  | ||||
|  | ||||
|   LinearOperatorD DirichletLinOpD; | ||||
|   LinearOperatorF DirichletLinOpF; | ||||
|   LinearOperatorD PeriodicLinOpD; | ||||
|   LinearOperatorF PeriodicLinOpF; | ||||
|  | ||||
|   LinearOperatorDagD DirichletLinOpDagD; | ||||
|   LinearOperatorDagF DirichletLinOpDagF; | ||||
|   LinearOperatorDagD PeriodicLinOpDagD; | ||||
|   LinearOperatorDagF PeriodicLinOpDagF; | ||||
|  | ||||
|   // Can tinker with these in the pseudofermion for force vs. action solves | ||||
|   Integer maxinnerit; | ||||
|   Integer maxouterit; | ||||
|   RealD tol; | ||||
|   RealD tolinner; | ||||
|    | ||||
|   Coordinate Block; | ||||
|  | ||||
|   DomainDecomposition Domains; | ||||
|  | ||||
|   SchurFactoredFermionOperator(FermionOperator<ImplD>  & _PeriodicFermOpD, | ||||
| 			       FermionOperator<ImplF>  & _PeriodicFermOpF, | ||||
| 			       FermionOperator<ImplD>  & _DirichletFermOpD, | ||||
| 			       FermionOperator<ImplF>  & _DirichletFermOpF, | ||||
| 			       Coordinate &_Block) | ||||
|     : Block(_Block), Domains(Block), | ||||
|  | ||||
|       PeriodicFermOpD(_PeriodicFermOpD), | ||||
|       PeriodicFermOpF(_PeriodicFermOpF), | ||||
|       DirichletFermOpD(_DirichletFermOpD), | ||||
|       DirichletFermOpF(_DirichletFermOpF), | ||||
|       DirichletLinOpD(DirichletFermOpD), | ||||
|       DirichletLinOpF(DirichletFermOpF), | ||||
|       PeriodicLinOpD(PeriodicFermOpD), | ||||
|       PeriodicLinOpF(PeriodicFermOpF), | ||||
|       DirichletLinOpDagD(DirichletFermOpD), | ||||
|       DirichletLinOpDagF(DirichletFermOpF), | ||||
|       PeriodicLinOpDagD(PeriodicFermOpD), | ||||
|       PeriodicLinOpDagF(PeriodicFermOpF) | ||||
|   { | ||||
|     tol=1.0e-10; | ||||
|     tolinner=1.0e-6; | ||||
|     maxinnerit=1000; | ||||
|     maxouterit=10; | ||||
|     assert(PeriodicFermOpD.FermionGrid() == DirichletFermOpD.FermionGrid()); | ||||
|     assert(PeriodicFermOpF.FermionGrid() == DirichletFermOpF.FermionGrid()); | ||||
|   }; | ||||
|  | ||||
|   enum Domain { Omega=0, OmegaBar=1 }; | ||||
|  | ||||
|   void ImportGauge(const GaugeField &Umu) | ||||
|   { | ||||
|     // Single precision will update in the mixed prec CG | ||||
|     PeriodicFermOpD.ImportGauge(Umu); | ||||
|     GaugeField dUmu(Umu.Grid()); | ||||
|     dUmu=Umu; | ||||
|     //    DirchletBCs(dUmu); | ||||
|     DirichletFilter<GaugeField> Filter(Block); | ||||
|     Filter.applyFilter(dUmu); | ||||
|     DirichletFermOpD.ImportGauge(dUmu); | ||||
|   } | ||||
|  | ||||
| /* | ||||
|   void ProjectBoundaryBothDomains (FermionField &f,int sgn) | ||||
|   { | ||||
|     assert((sgn==1)||(sgn==-1)); | ||||
|     Real rsgn = sgn; | ||||
|  | ||||
|     Gamma::Algebra Gmu [] = { | ||||
|       Gamma::Algebra::GammaX, | ||||
|       Gamma::Algebra::GammaY, | ||||
|       Gamma::Algebra::GammaZ, | ||||
|       Gamma::Algebra::GammaT | ||||
|     }; | ||||
|  | ||||
|     GridBase *grid = f.Grid(); | ||||
|     LatticeInteger  coor(grid); | ||||
|     LatticeInteger  face(grid); | ||||
|     LatticeInteger  one(grid); one = 1; | ||||
|     LatticeInteger  zero(grid); zero = 0; | ||||
|     LatticeInteger nface(grid); nface=Zero(); | ||||
|      | ||||
|     FermionField projected(grid); projected=Zero(); | ||||
|     FermionField sp_proj  (grid); | ||||
|  | ||||
|     int dims = grid->Nd(); | ||||
|     int isDWF= (dims==Nd+1); | ||||
|     assert((dims==Nd)||(dims==Nd+1)); | ||||
|     Coordinate Global=grid->GlobalDimensions(); | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|  | ||||
|       if ( Block[mu] <= Global[mu+isDWF] ) { | ||||
| 	// need to worry about DWF 5th dim first | ||||
| 	LatticeCoordinate(coor,mu+isDWF);  | ||||
|        | ||||
| 	face = where(mod(coor,Block[mu]) == Integer(0),one,zero ); | ||||
| 	nface = nface + face; | ||||
|  | ||||
| 	Gamma G(Gmu[mu]); | ||||
| 	// Lower face receives (1-gamma)/2 in normal forward hopping term | ||||
| 	sp_proj  = 0.5*(f-G*f*rsgn); | ||||
| 	projected= where(face,sp_proj,projected); | ||||
| 	//projected= where(face,f,projected); | ||||
|        | ||||
| 	face = where(mod(coor,Block[mu]) == Integer(Block[mu]-1) ,one,zero ); | ||||
| 	nface = nface + face; | ||||
|  | ||||
| 	// Upper face receives (1+gamma)/2 in normal backward hopping term | ||||
| 	sp_proj = 0.5*(f+G*f*rsgn); | ||||
| 	projected= where(face,sp_proj,projected); | ||||
| 	//projected= where(face,f,projected); | ||||
|       } | ||||
|        | ||||
|     } | ||||
|     // Initial Zero() where nface==0. | ||||
|     // Keep the spin projected faces where nface==1 | ||||
|     // Full spinor where nface>=2 | ||||
|     projected = where(nface>Integer(1),f,projected); | ||||
|     f=projected; | ||||
|   } | ||||
| */ | ||||
|   void ProjectBoundaryBothDomains (FermionField &f,int sgn) | ||||
|   { | ||||
|     assert((sgn==1)||(sgn==-1)); | ||||
|     Real rsgn = sgn; | ||||
|  | ||||
|     Gamma::Algebra Gmu [] = { | ||||
|       Gamma::Algebra::GammaX, | ||||
|       Gamma::Algebra::GammaY, | ||||
|       Gamma::Algebra::GammaZ, | ||||
|       Gamma::Algebra::GammaT | ||||
|     }; | ||||
|  | ||||
|     GridBase *grid = f.Grid(); | ||||
|     LatticeInteger  coor(grid); | ||||
|     LatticeInteger  face(grid); | ||||
|     LatticeInteger  one(grid);   one = 1; | ||||
|     LatticeInteger  zero(grid); zero = 0; | ||||
|     LatticeInteger  omega(grid); | ||||
|     LatticeInteger  omegabar(grid); | ||||
|     LatticeInteger  tmp(grid); | ||||
|  | ||||
|     omega=one;    Domains.ProjectDomain(omega,0); | ||||
|     omegabar=one; Domains.ProjectDomain(omegabar,1); | ||||
|      | ||||
|     LatticeInteger nface(grid); nface=Zero(); | ||||
|      | ||||
|     FermionField projected(grid); projected=Zero(); | ||||
|     FermionField sp_proj  (grid); | ||||
|  | ||||
|     int dims = grid->Nd(); | ||||
|     int isDWF= (dims==Nd+1); | ||||
|     assert((dims==Nd)||(dims==Nd+1)); | ||||
|     Coordinate Global=grid->GlobalDimensions(); | ||||
|  | ||||
|     for(int mmu=0;mmu<Nd;mmu++){ | ||||
|       Gamma G(Gmu[mmu]); | ||||
|  | ||||
|       // need to worry about DWF 5th dim first | ||||
|       int mu = mmu+isDWF; | ||||
|       if ( Block[mmu] && (Block[mmu] <= Global[mu]) ) { | ||||
|  | ||||
| 	// Lower face receives (1-gamma)/2 in normal forward hopping term | ||||
|  	tmp = Cshift(omegabar,mu,-1); | ||||
| 	tmp = tmp + omega; | ||||
| 	face = where(tmp == Integer(2),one,zero ); | ||||
|  | ||||
|  	tmp = Cshift(omega,mu,-1); | ||||
| 	tmp = tmp + omegabar; | ||||
| 	face = where(tmp == Integer(2),one,face ); | ||||
|  | ||||
| 	nface = nface + face; | ||||
|  | ||||
| 	sp_proj  = 0.5*(f-G*f*rsgn); | ||||
| 	projected= where(face,sp_proj,projected); | ||||
|  | ||||
| 	// Upper face receives (1+gamma)/2 in normal backward hopping term | ||||
|  	tmp = Cshift(omegabar,mu,1); | ||||
| 	tmp = tmp + omega; | ||||
| 	face = where(tmp == Integer(2),one,zero ); | ||||
|  | ||||
|  	tmp = Cshift(omega,mu,1); | ||||
| 	tmp = tmp + omegabar; | ||||
| 	face = where(tmp == Integer(2),one,face ); | ||||
|  | ||||
| 	nface = nface + face; | ||||
|  | ||||
| 	sp_proj = 0.5*(f+G*f*rsgn); | ||||
| 	projected= where(face,sp_proj,projected); | ||||
|       } | ||||
|        | ||||
|     } | ||||
|     // Initial Zero() where nface==0. | ||||
|     // Keep the spin projected faces where nface==1 | ||||
|     // Full spinor where nface>=2 | ||||
|     projected = where(nface>Integer(1),f,projected); | ||||
|     f=projected; | ||||
|   } | ||||
|  | ||||
|   void ProjectDomain(FermionField &f,int domain) | ||||
|   { | ||||
| /* | ||||
|     GridBase *grid = f.Grid(); | ||||
|     int dims = grid->Nd(); | ||||
|     int isDWF= (dims==Nd+1); | ||||
|     assert((dims==Nd)||(dims==Nd+1)); | ||||
|  | ||||
|     FermionField zz(grid); zz=Zero(); | ||||
|     LatticeInteger coor(grid); | ||||
|     LatticeInteger domaincb(grid); domaincb=Zero(); | ||||
|     for(int d=0;d<Nd;d++){ | ||||
|       LatticeCoordinate(coor,d+isDWF); | ||||
|       domaincb = domaincb + div(coor,Block[d]); | ||||
|     } | ||||
|     f = where(mod(domaincb,2)==Integer(domain),f,zz); | ||||
| */ | ||||
|     Domains.ProjectDomain(f,domain); | ||||
|  | ||||
|   }; | ||||
|   void ProjectOmegaBar   (FermionField &f) {ProjectDomain(f,OmegaBar);} | ||||
|   void ProjectOmega      (FermionField &f) {ProjectDomain(f,Omega);} | ||||
|   // See my notes(!). | ||||
|   // Notation: Following Luscher, we introduce projectors $\hPdb$ with both spinor and space structure | ||||
|   // projecting all spinor elements in $\Omega$ connected by $\Ddb$ to $\bar{\Omega}$, | ||||
|   void ProjectBoundaryBar(FermionField &f) | ||||
|   { | ||||
|     ProjectBoundaryBothDomains(f,1); | ||||
|     ProjectOmega(f); | ||||
|   } | ||||
|   // and $\hPd$ projecting all spinor elements in $\bar{\Omega}$ connected by $\Dd$ to $\Omega$. | ||||
|   void ProjectBoundary   (FermionField &f) | ||||
|   { | ||||
|     ProjectBoundaryBothDomains(f,1); | ||||
|     ProjectOmegaBar(f); | ||||
|     //    DumpSliceNorm("ProjectBoundary",f,f.Grid()->Nd()-1); | ||||
|   }; | ||||
|  | ||||
|   void dBoundary    (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     PeriodicFermOpD.M(tmp,out); | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dBoundaryDag (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     PeriodicFermOpD.Mdag(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dBoundaryBar (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     PeriodicFermOpD.M(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dBoundaryBarDag (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     PeriodicFermOpD.Mdag(tmp,out); | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dOmega       (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     DirichletFermOpD.M(tmp,out); | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dOmegaBar    (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     DirichletFermOpD.M(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dOmegaDag       (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     DirichletFermOpD.Mdag(tmp,out); | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dOmegaBarDag    (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     DirichletFermOpD.Mdag(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dOmegaInv   (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     dOmegaInvAndOmegaBarInv(tmp,out); // Inefficient warning | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dOmegaBarInv(FermionField &in,FermionField &out) | ||||
|   {     | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     dOmegaInvAndOmegaBarInv(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dOmegaDagInv   (FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmega(tmp); | ||||
|     dOmegaDagInvAndOmegaBarDagInv(tmp,out); | ||||
|     ProjectOmega(out); | ||||
|   }; | ||||
|   void dOmegaBarDagInv(FermionField &in,FermionField &out) | ||||
|   {     | ||||
|     FermionField tmp(in); | ||||
|     ProjectOmegaBar(tmp); | ||||
|     dOmegaDagInvAndOmegaBarDagInv(tmp,out); | ||||
|     ProjectOmegaBar(out); | ||||
|   }; | ||||
|   void dOmegaInvAndOmegaBarInv(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     MxPCG OmegaSolver(tol, | ||||
| 		      tolinner, | ||||
| 		      maxinnerit, | ||||
| 		      maxouterit, | ||||
| 		      DirichletFermOpF.FermionRedBlackGrid(), | ||||
| 		      DirichletFermOpF, | ||||
| 		      DirichletFermOpD, | ||||
| 		      DirichletLinOpF, | ||||
| 		      DirichletLinOpD); | ||||
|     SchurRedBlackDiagMooeeSolve<FermionField> PrecSolve(OmegaSolver); | ||||
|     PrecSolve(DirichletFermOpD,in,out); | ||||
|   }; | ||||
|   void dOmegaDagInvAndOmegaBarDagInv(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     MxDagPCG OmegaDagSolver(tol, | ||||
| 			    tolinner, | ||||
| 			    maxinnerit, | ||||
| 			    maxouterit, | ||||
| 			    DirichletFermOpF.FermionRedBlackGrid(), | ||||
| 			    DirichletFermOpF, | ||||
| 			    DirichletFermOpD, | ||||
| 			    DirichletLinOpDagF, | ||||
| 			    DirichletLinOpDagD); | ||||
|     SchurRedBlackDiagMooeeDagSolve<FermionField> PrecSolve(OmegaDagSolver); | ||||
|     PrecSolve(DirichletFermOpD,in,out); | ||||
|   }; | ||||
|  | ||||
|   // Rdag = Pdbar - DdbarDag DomegabarDagInv  DdDag DomegaDagInv Pdbar  | ||||
|   void RDag(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||
|     FermionField tmp2(PeriodicFermOpD.FermionGrid()); | ||||
|     out = in; | ||||
|     ProjectBoundaryBar(out); | ||||
|     dOmegaDagInv(out,tmp1);    | ||||
|     dBoundaryDag(tmp1,tmp2);    | ||||
|     dOmegaBarDagInv(tmp2,tmp1); | ||||
|     dBoundaryBarDag(tmp1,tmp2);  | ||||
|     out = out - tmp2; | ||||
|   }; | ||||
|  | ||||
|   // R = Pdbar - Pdbar DomegaInv Dd DomegabarInv Ddbar | ||||
|   void R(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||
|     FermionField tmp2(PeriodicFermOpD.FermionGrid()); | ||||
|     out = in; | ||||
|     ProjectBoundaryBar(out); | ||||
|     dBoundaryBar(out,tmp1);  | ||||
|     dOmegaBarInv(tmp1,tmp2); | ||||
|     dBoundary(tmp2,tmp1);    | ||||
|     dOmegaInv(tmp1,tmp2);    | ||||
|     out = in - tmp2 ;        | ||||
|     ProjectBoundaryBar(out); | ||||
|     //    DumpSliceNorm("R",out,out.Grid()->Nd()-1); | ||||
|   }; | ||||
|    | ||||
|   // R = Pdbar - Pdbar Dinv Ddbar  | ||||
|   void RInv(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp1(PeriodicFermOpD.FermionGrid()); | ||||
|     dBoundaryBar(in,out); | ||||
|     Dinverse(out,tmp1);   | ||||
|     out =in -tmp1;  | ||||
|     ProjectBoundaryBar(out); | ||||
|   }; | ||||
|   // R = Pdbar - DdbarDag DinvDag Pdbar  | ||||
|   void RDagInv(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     FermionField tmp(PeriodicFermOpD.FermionGrid()); | ||||
|     FermionField Pin(PeriodicFermOpD.FermionGrid()); | ||||
|     Pin = in; ProjectBoundaryBar(Pin); | ||||
|     DinverseDag(Pin,out);   | ||||
|     dBoundaryBarDag(out,tmp); | ||||
|     out =Pin -tmp;  | ||||
|   }; | ||||
|   // Non-dirichlet inverter using red-black preconditioning | ||||
|   void Dinverse(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     MxPCG DSolver(tol, | ||||
| 		  tolinner, | ||||
| 		  maxinnerit, | ||||
| 		  maxouterit, | ||||
| 		  PeriodicFermOpF.FermionRedBlackGrid(), | ||||
| 		  PeriodicFermOpF, | ||||
| 		  PeriodicFermOpD, | ||||
| 		  PeriodicLinOpF, | ||||
| 		  PeriodicLinOpD); | ||||
|     SchurRedBlackDiagMooeeSolve<FermionField> Solve(DSolver); | ||||
|     Solve(PeriodicFermOpD,in,out); | ||||
|   } | ||||
|   void DinverseDag(FermionField &in,FermionField &out) | ||||
|   { | ||||
|     MxDagPCG DdagSolver(tol, | ||||
| 			tolinner, | ||||
| 			maxinnerit, | ||||
| 			maxouterit, | ||||
| 			PeriodicFermOpF.FermionRedBlackGrid(), | ||||
| 			PeriodicFermOpF, | ||||
| 			PeriodicFermOpD, | ||||
| 			PeriodicLinOpDagF, | ||||
| 			PeriodicLinOpDagD); | ||||
|     SchurRedBlackDiagMooeeDagSolve<FermionField> Solve(DdagSolver); | ||||
|     Solve(PeriodicFermOpD,in,out); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| @@ -68,12 +68,11 @@ public: | ||||
|   /*****************************************************/ | ||||
|   /* Compress includes precision change if mpi data is not same */ | ||||
|   /*****************************************************/ | ||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||
|     typedef decltype(coalescedRead(buf)) sobj; | ||||
|     sobj sp; | ||||
|     auto sin = coalescedRead(in); | ||||
|     projector::Proj(sp,sin,mu,dag); | ||||
|     coalescedWrite(buf,sp); | ||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> | ||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { | ||||
|     _SiteHalfSpinor tmp; | ||||
|     projector::Proj(tmp,in,mu,dag); | ||||
|     vstream(buf[o],tmp); | ||||
|   } | ||||
|  | ||||
|   /*****************************************************/ | ||||
| @@ -83,18 +82,13 @@ public: | ||||
| 				   const SiteHalfSpinor * __restrict__ vp0, | ||||
| 				   const SiteHalfSpinor * __restrict__ vp1, | ||||
| 				   Integer type,Integer o) const { | ||||
| #ifdef GRID_SIMT | ||||
|     exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); | ||||
| #else | ||||
|     SiteHalfSpinor tmp1; | ||||
|     SiteHalfSpinor tmp2; | ||||
|     exchange(tmp1,tmp2,vp0[o],vp1[o],type); | ||||
|     vstream(mp[2*o  ],tmp1); | ||||
|     vstream(mp[2*o+1],tmp2); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|  | ||||
|   /*****************************************************/ | ||||
|   /* Have a decompression step if mpi data is not same */ | ||||
|   /*****************************************************/ | ||||
| @@ -111,28 +105,6 @@ public: | ||||
| 					   const SiteSpinor * __restrict__ in, | ||||
| 					   Integer j,Integer k, Integer m,Integer type) const | ||||
|   { | ||||
| #ifdef GRID_SIMT | ||||
|     typedef SiteSpinor vobj; | ||||
|     typedef SiteHalfSpinor hvobj; | ||||
|     typedef decltype(coalescedRead(*in))    sobj; | ||||
|     typedef decltype(coalescedRead(*out0)) hsobj; | ||||
|  | ||||
|     unsigned int Nsimd = vobj::Nsimd(); | ||||
|     unsigned int mask = Nsimd >> (type + 1); | ||||
|     int lane = acceleratorSIMTlane(Nsimd); | ||||
|     int j0 = lane &(~mask); // inner coor zero | ||||
|     int j1 = lane |(mask) ; // inner coor one | ||||
|     const vobj *vp0 = &in[k]; | ||||
|     const vobj *vp1 = &in[m]; | ||||
|     const vobj *vp = (lane&mask) ? vp1:vp0; | ||||
|     auto sa = coalescedRead(*vp,j0); | ||||
|     auto sb = coalescedRead(*vp,j1); | ||||
|     hsobj psa, psb; | ||||
|     projector::Proj(psa,sa,mu,dag); | ||||
|     projector::Proj(psb,sb,mu,dag); | ||||
|     coalescedWrite(out0[j],psa); | ||||
|     coalescedWrite(out1[j],psb); | ||||
| #else | ||||
|     SiteHalfSpinor temp1, temp2; | ||||
|     SiteHalfSpinor temp3, temp4; | ||||
|     projector::Proj(temp1,in[k],mu,dag); | ||||
| @@ -140,7 +112,6 @@ public: | ||||
|     exchange(temp3,temp4,temp1,temp2,type); | ||||
|     vstream(out0[j],temp3); | ||||
|     vstream(out1[j],temp4); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   /*****************************************************/ | ||||
| @@ -150,7 +121,6 @@ public: | ||||
|  | ||||
| }; | ||||
|  | ||||
| #if 0 | ||||
| template<class _HCspinor,class _Hspinor,class _Spinor, class projector> | ||||
| class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, | ||||
| 				typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > | ||||
| @@ -179,23 +149,13 @@ public: | ||||
|   /*****************************************************/ | ||||
|   /* Compress includes precision change if mpi data is not same */ | ||||
|   /*****************************************************/ | ||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||
|     SiteHalfSpinor hsp; | ||||
|   template<class _SiteHalfSpinor, class _SiteSpinor> | ||||
|   accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { | ||||
|     _SiteHalfSpinor hsp; | ||||
|     SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; | ||||
|     projector::Proj(hsp,in,mu,dag); | ||||
|     precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); | ||||
|   } | ||||
|   accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const { | ||||
| #ifdef GRID_SIMT | ||||
|     typedef decltype(coalescedRead(buf)) sobj; | ||||
|     sobj sp; | ||||
|     auto sin = coalescedRead(in); | ||||
|     projector::Proj(sp,sin,mu,dag); | ||||
|     coalescedWrite(buf,sp); | ||||
| #else | ||||
|     projector::Proj(buf,in,mu,dag); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   /*****************************************************/ | ||||
|   /* Exchange includes precision change if mpi data is not same */ | ||||
| @@ -243,7 +203,6 @@ public: | ||||
|   accelerator_inline bool DecompressionStep(void) const { return true; } | ||||
|  | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| #define DECLARE_PROJ(Projector,Compressor,spProj)			\ | ||||
|   class Projector {							\ | ||||
| @@ -294,8 +253,33 @@ public: | ||||
|   typedef typename Base::View_type View_type; | ||||
|   typedef typename Base::StencilVector StencilVector; | ||||
|  | ||||
|   void ZeroCountersi(void)  {  } | ||||
|   void Reporti(int calls)  {  } | ||||
|   double timer0; | ||||
|   double timer1; | ||||
|   double timer2; | ||||
|   double timer3; | ||||
|   double timer4; | ||||
|   double timer5; | ||||
|   double timer6; | ||||
|   uint64_t callsi; | ||||
|   void ZeroCountersi(void) | ||||
|   { | ||||
|     timer0=0; | ||||
|     timer1=0; | ||||
|     timer2=0; | ||||
|     timer3=0; | ||||
|     timer4=0; | ||||
|     timer5=0; | ||||
|     timer6=0; | ||||
|     callsi=0; | ||||
|   } | ||||
|   void Reporti(int calls) | ||||
|   { | ||||
|     if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl; | ||||
|     if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate)   " <<timer1/calls <<std::endl; | ||||
|     if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge )   " <<timer2/calls <<std::endl; | ||||
|     if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl; | ||||
|     if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl; | ||||
|   } | ||||
|  | ||||
|   std::vector<int> surface_list; | ||||
|  | ||||
| @@ -303,11 +287,9 @@ public: | ||||
| 		int npoints, | ||||
| 		int checkerboard, | ||||
| 		const std::vector<int> &directions, | ||||
| 		const std::vector<int> &distances, | ||||
| 		bool locally_periodic, | ||||
| 		Parameters p)   | ||||
|     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,locally_periodic,p) | ||||
|   { | ||||
| 		const std::vector<int> &distances,Parameters p)   | ||||
|     : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)  | ||||
|   {  | ||||
|     ZeroCountersi(); | ||||
|     surface_list.resize(0); | ||||
|     this->same_node.resize(npoints); | ||||
| @@ -339,18 +321,26 @@ public: | ||||
|   { | ||||
|     std::vector<std::vector<CommsRequest_t> > reqs; | ||||
|     this->HaloExchangeOptGather(source,compress); | ||||
|     double t1=usecond(); | ||||
|     // Asynchronous MPI calls multidirectional, Isend etc... | ||||
|     // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. | ||||
|     this->Communicate(); | ||||
|     double t2=usecond(); timer1 += t2-t1; | ||||
|     this->CommsMerge(compress); | ||||
|     double t3=usecond(); timer2 += t3-t2; | ||||
|     this->CommsMergeSHM(compress); | ||||
|     double t4=usecond(); timer3 += t4-t3; | ||||
|   } | ||||
|    | ||||
|   template <class compressor> | ||||
|   void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)  | ||||
|   { | ||||
|     this->Prepare(); | ||||
|     double t0=usecond(); | ||||
|     this->HaloGatherOpt(source,compress); | ||||
|     double t1=usecond(); | ||||
|     timer0 += t1-t0; | ||||
|     callsi++; | ||||
|   } | ||||
|  | ||||
|   template <class compressor> | ||||
| @@ -362,9 +352,12 @@ public: | ||||
|     typedef typename compressor::SiteHalfSpinor     SiteHalfSpinor; | ||||
|     typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; | ||||
|  | ||||
|     this->mpi3synctime_g-=usecond(); | ||||
|     this->_grid->StencilBarrier(); | ||||
|     this->mpi3synctime_g+=usecond(); | ||||
|  | ||||
|     assert(source.Grid()==this->_grid); | ||||
|     this->halogtime-=usecond(); | ||||
|      | ||||
|     this->u_comm_offset=0; | ||||
|        | ||||
| @@ -400,6 +393,7 @@ public: | ||||
|     } | ||||
|     this->face_table_computed=1; | ||||
|     assert(this->u_comm_offset==this->_unified_buffer_size); | ||||
|     this->halogtime+=usecond(); | ||||
|     accelerator_barrier(); | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -146,11 +146,8 @@ public: | ||||
|   void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
|                     const FermionField &in, FermionField &out, int dag); | ||||
|  | ||||
|   void DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 				  const FermionField &in, FermionField &out, int dag); | ||||
|    | ||||
|   void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, | ||||
| 				   const FermionField &in, FermionField &out, int dag); | ||||
|                     const FermionField &in, FermionField &out, int dag); | ||||
|  | ||||
|   // Constructor | ||||
|   WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | ||||
| @@ -160,10 +157,7 @@ public: | ||||
|  | ||||
|   // DoubleStore impl dependent | ||||
|   void ImportGauge(const GaugeField &_Umu); | ||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||
|    | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Data members require to support the functionality | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -165,14 +165,7 @@ public: | ||||
| 			       const FermionField &in,  | ||||
| 			       FermionField &out, | ||||
| 			       int dag); | ||||
|  | ||||
|   void DhopInternalDirichletComms(StencilImpl & st, | ||||
| 				  LebesgueOrder &lo, | ||||
| 				  DoubledGaugeField &U, | ||||
| 				  const FermionField &in,  | ||||
| 				  FermionField &out, | ||||
| 				  int dag); | ||||
|    | ||||
|      | ||||
|   // Constructors | ||||
|   WilsonFermion5D(GaugeField &_Umu, | ||||
| 		  GridCartesian         &FiveDimGrid, | ||||
| @@ -181,11 +174,19 @@ public: | ||||
| 		  GridRedBlackCartesian &FourDimRedBlackGrid, | ||||
| 		  double _M5,const ImplParams &p= ImplParams()); | ||||
|      | ||||
|   // Constructors | ||||
|   /* | ||||
|     WilsonFermion5D(int simd,  | ||||
|     GaugeField &_Umu, | ||||
|     GridCartesian         &FiveDimGrid, | ||||
|     GridRedBlackCartesian &FiveDimRedBlackGrid, | ||||
|     GridCartesian         &FourDimGrid, | ||||
|     double _M5,const ImplParams &p= ImplParams()); | ||||
|   */ | ||||
|      | ||||
|   // DoubleStore | ||||
|   void ImportGauge(const GaugeField &_Umu); | ||||
|   DoubledGaugeField &GetDoubledGaugeField(void){ return Umu; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldE(void){ return UmuEven; }; | ||||
|   DoubledGaugeField &GetDoubledGaugeFieldO(void){ return UmuOdd; }; | ||||
|      | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Data members require to support the functionality | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -243,17 +243,17 @@ typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffReal > WilsonImplR | ||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF;  // Float | ||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD;  // Double | ||||
|  | ||||
| //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | ||||
| //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | ||||
| //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | ||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL;  // Real.. whichever prec | ||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH;  // Float | ||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF;  // Double | ||||
|  | ||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec | ||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float | ||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double | ||||
|  | ||||
| //typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | ||||
| //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | ||||
| //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | ||||
| typedef WilsonImpl<vComplex,  FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec | ||||
| typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float | ||||
| typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double | ||||
|   | ||||
| typedef WilsonImpl<vComplex,  AdjointRepresentation, CoeffReal > WilsonAdjImplR;   // Real.. whichever prec | ||||
| typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF;  // Float | ||||
|   | ||||
| @@ -39,7 +39,7 @@ NAMESPACE_BEGIN(Grid); | ||||
| class WilsonKernelsStatic {  | ||||
| public: | ||||
|   enum { OptGeneric, OptHandUnroll, OptInlineAsm }; | ||||
|   enum { CommsAndCompute, CommsThenCompute, CommsDirichlet }; | ||||
|   enum { CommsAndCompute, CommsThenCompute }; | ||||
|   static int Opt;   | ||||
|   static int Comms; | ||||
| }; | ||||
|   | ||||
| @@ -112,6 +112,7 @@ void CayleyFermion5D<Impl>::ImportUnphysicalFermion(const FermionField &input4d, | ||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||
|   imported5d=tmp; | ||||
| } | ||||
|  | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) | ||||
| { | ||||
| @@ -126,37 +127,6 @@ void CayleyFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &inpu | ||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||
|   Dminus(tmp,imported5d); | ||||
| } | ||||
| //////////////////////////////////////////////////// | ||||
| // Added for fourD pseudofermion det estimation | ||||
| //////////////////////////////////////////////////// | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::ImportFourDimPseudoFermion(const FermionField &input4d,FermionField &imported5d) | ||||
| { | ||||
|   int Ls = this->Ls; | ||||
|   FermionField tmp(this->FermionGrid()); | ||||
|   conformable(imported5d.Grid(),this->FermionGrid()); | ||||
|   conformable(input4d.Grid()   ,this->GaugeGrid()); | ||||
|   tmp = Zero(); | ||||
|   InsertSlice(input4d, tmp, 0   , 0); | ||||
|   InsertSlice(input4d, tmp, Ls-1, 0); | ||||
|   axpby_ssp_pminus(tmp, 0., tmp, 1., tmp, 0, 0); | ||||
|   axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, Ls-1, Ls-1); | ||||
|   imported5d=tmp; | ||||
| } | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::ExportFourDimPseudoFermion(const FermionField &solution5d,FermionField &exported4d) | ||||
| { | ||||
|   int Ls = this->Ls; | ||||
|   FermionField tmp(this->FermionGrid()); | ||||
|   tmp = solution5d; | ||||
|   conformable(solution5d.Grid(),this->FermionGrid()); | ||||
|   conformable(exported4d.Grid(),this->GaugeGrid()); | ||||
|   axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); | ||||
|   axpby_ssp_pplus (tmp, 1., tmp       , 1., solution5d, 0, Ls-1); | ||||
|   ExtractSlice(exported4d, tmp, 0, 0); | ||||
| } | ||||
|  | ||||
| // Dminus | ||||
| template<class Impl>   | ||||
| void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi) | ||||
| { | ||||
| @@ -910,7 +880,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | ||||
|   } | ||||
|  | ||||
|   std::vector<RealD> G_s(Ls,1.0); | ||||
|   RealD sign = 1; // sign flip for vector/tadpole | ||||
|   Integer sign = 1; // sign flip for vector/tadpole | ||||
|   if ( curr_type == Current::Axial ) { | ||||
|     for(int s=0;s<Ls/2;s++){ | ||||
|       G_s[s] = -1.0; | ||||
| @@ -931,8 +901,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, | ||||
|   for(int s=0;s<Ls;s++){ | ||||
|  | ||||
|     int sp = (s+1)%Ls; | ||||
|     //    int sr = Ls-1-s; | ||||
|     //    int srp= (sr+1)%Ls; | ||||
|     int sr = Ls-1-s; | ||||
|     int srp= (sr+1)%Ls; | ||||
|  | ||||
|     // Mobius parameters | ||||
|     auto b=this->bs[s]; | ||||
|   | ||||
| @@ -51,9 +51,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, | ||||
|   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), | ||||
|   _FourDimGrid        (&FourDimGrid), | ||||
|   _FourDimRedBlackGrid(&FourDimRedBlackGrid), | ||||
|   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p.locally_periodic,p), | ||||
|   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p.locally_periodic,p), // source is Even | ||||
|   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p.locally_periodic,p), // source is Odd | ||||
|   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements,p), | ||||
|   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even | ||||
|   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd | ||||
|   M5(_M5), | ||||
|   Umu(_FourDimGrid), | ||||
|   UmuEven(_FourDimRedBlackGrid), | ||||
| @@ -361,21 +361,10 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, | ||||
|                                          const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   DhopTotalTime-=usecond(); | ||||
|  | ||||
|   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) | ||||
| 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) | ||||
|          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); | ||||
|  | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) | ||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||
|   } | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { | ||||
|   else  | ||||
|     DhopInternalSerialComms(st,lo,U,in,out,dag); | ||||
|   } | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { | ||||
|     DhopInternalDirichletComms(st,lo,U,in,out,dag); | ||||
|   } | ||||
|    | ||||
|   DhopTotalTime+=usecond(); | ||||
| } | ||||
|  | ||||
| @@ -442,30 +431,6 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | ||||
|   DhopComputeTime2+=usecond(); | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopInternalDirichletComms(StencilImpl & st, LebesgueOrder &lo, | ||||
| 						       DoubledGaugeField & U, | ||||
| 						       const FermionField &in, FermionField &out,int dag) | ||||
| { | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
|   int LLs = in.Grid()->_rdimensions[0]; | ||||
|   int len =  U.Grid()->oSites(); | ||||
|        | ||||
|   ///////////////////////////// | ||||
|   // do the compute interior | ||||
|   ///////////////////////////// | ||||
|   int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know | ||||
|   DhopComputeTime-=usecond(); | ||||
|   if (dag == DaggerYes) { | ||||
|     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); | ||||
|   } else { | ||||
|     Kernels::DhopKernel   (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); | ||||
|   } | ||||
|   accelerator_barrier(); | ||||
|   DhopComputeTime+=usecond(); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | ||||
|   | ||||
| @@ -47,9 +47,9 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, | ||||
|     Kernels(p), | ||||
|     _grid(&Fgrid), | ||||
|     _cbgrid(&Hgrid), | ||||
|     Stencil(&Fgrid, npoint, Even, directions, displacements,p.locally_periodic,p), | ||||
|     StencilEven(&Hgrid, npoint, Even, directions,displacements,p.locally_periodic,p),  // source is Even | ||||
|     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p.locally_periodic,p),  // source is Odd | ||||
|     Stencil(&Fgrid, npoint, Even, directions, displacements,p), | ||||
|     StencilEven(&Hgrid, npoint, Even, directions,displacements,p),  // source is Even | ||||
|     StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p),  // source is Odd | ||||
|     mass(_mass), | ||||
|     Lebesgue(_grid), | ||||
|     LebesgueEvenOdd(_cbgrid), | ||||
| @@ -488,21 +488,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | ||||
|                                        FermionField &out, int dag) | ||||
| { | ||||
|   DhopTotalTime-=usecond(); | ||||
|  | ||||
|   assert(  (WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute) | ||||
| 	 ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) | ||||
|          ||(WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet) ); | ||||
|  | ||||
|  | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) { | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) | ||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||
|   } | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute ) { | ||||
|   else | ||||
| #endif | ||||
|     DhopInternalSerial(st,lo,U,in,out,dag); | ||||
|   } | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsDirichlet ) { | ||||
|     DhopInternalDirichletComms(st,lo,U,in,out,dag); | ||||
|   } | ||||
|   DhopTotalTime+=usecond(); | ||||
| } | ||||
|  | ||||
| @@ -571,29 +562,6 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO | ||||
|   DhopComputeTime2+=usecond(); | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopInternalDirichletComms(StencilImpl &st, LebesgueOrder &lo, | ||||
| 						     DoubledGaugeField &U, | ||||
| 						     const FermionField &in, | ||||
| 						     FermionField &out, int dag) | ||||
| { | ||||
|   assert((dag == DaggerNo) || (dag == DaggerYes)); | ||||
|  | ||||
|   Compressor compressor(dag); | ||||
|   int len =  U.Grid()->oSites(); | ||||
|  | ||||
|   ///////////////////////////// | ||||
|   // do the compute interior | ||||
|   ///////////////////////////// | ||||
|   int Opt = WilsonKernelsStatic::Opt; | ||||
|   DhopComputeTime-=usecond(); | ||||
|   if (dag == DaggerYes) { | ||||
|     Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); | ||||
|   } else { | ||||
|     Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); | ||||
|   } | ||||
|   DhopComputeTime+=usecond(); | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, | ||||
|   | ||||
| @@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| @@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
| // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| // template<> void | ||||
| // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| // 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
| #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") | ||||
| template<> void | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| @@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|        | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // XYZT vectorised, dag Kernel, single | ||||
| @@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| #undef MAYBEPERM | ||||
| #undef MULT_2SPIN | ||||
| @@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| // | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // Ls vectorised, dag Kernel, single | ||||
| @@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #endif  // VEC 5D | ||||
|  | ||||
| @@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|        | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|        | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // XYZT vectorised, dag Kernel, single | ||||
| @@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| //template<> void | ||||
| //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 						int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| #undef MAYBEPERM | ||||
| #undef MULT_2SPIN | ||||
| @@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| //							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, | ||||
| 							 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| 				     | ||||
| ///////////////////////////////////////////////////////////////// | ||||
| // Ls vectorised, dag Kernel, single | ||||
| @@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #define INTERIOR | ||||
| @@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #undef INTERIOR_AND_EXTERIOR | ||||
| #undef INTERIOR | ||||
| @@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| //template<> void | ||||
| //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| //template<> void | ||||
| //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| //							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
| template<> void  | ||||
| WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, | ||||
| 							    int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) | ||||
| #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> | ||||
|  | ||||
| #endif  // VEC 5D | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1 @@ | ||||
| ../CayleyFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../ContinuedFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../DomainWallEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../MobiusEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../PartialFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonCloverFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonKernelsInstantiationGparity.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonTMFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| #define IMPLEMENTATION GparityWilsonImplDF | ||||
| @@ -0,0 +1 @@ | ||||
| ../CayleyFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../ContinuedFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../DomainWallEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../MobiusEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../PartialFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonCloverFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonKernelsInstantiationGparity.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonTMFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| #define IMPLEMENTATION GparityWilsonImplFH | ||||
| @@ -0,0 +1 @@ | ||||
| ../CayleyFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../ContinuedFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../DomainWallEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../MobiusEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../PartialFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonCloverFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermionInstantiation.cc.master | ||||
| @@ -2,12 +2,14 @@ | ||||
| 
 | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
| 
 | ||||
| Source file: ./lib/qcd/hmc/DDHMC.h | ||||
| Source file: ./lib/qcd/action/fermion/WilsonKernels.cc | ||||
| 
 | ||||
| Copyright (C) 2021 | ||||
| Copyright (C) 2015, 2020 | ||||
| 
 | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Christopher Kelly | ||||
| Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Nils Meyer <nils.meyer@ur.de> Regensburg University | ||||
| 
 | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -26,27 +28,24 @@ with this program; if not, write to the Free Software Foundation, Inc., | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| 			   /*  END LEGAL */ | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/qcd/action/fermion/FermionCore.h> | ||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h> | ||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h> | ||||
| 
 | ||||
| #ifndef AVX512 | ||||
| #ifndef QPX | ||||
| #ifndef A64FX | ||||
| #ifndef A64FXFIXEDSIZE | ||||
| #include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h> | ||||
| #endif | ||||
| #endif | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| NAMESPACE_BEGIN(Grid); | ||||
| ////////////////////////////////////////////////////
 | ||||
| // DDHMC filter with sub-block size B[mu]
 | ||||
| ////////////////////////////////////////////////////
 | ||||
| 
 | ||||
| template<typename MomentaField> | ||||
| struct DDHMCFilter: public MomentumFilterBase<MomentaField> | ||||
| { | ||||
|   Coordinate Block; | ||||
|   int Width; | ||||
|    | ||||
|   DDHMCFilter(const Coordinate &_Block): Block(_Block) {} | ||||
| 
 | ||||
|   void applyFilter(MomentaField &P) const override | ||||
|   { | ||||
|     DomainDecomposition Domains(Block); | ||||
|     Domains.ProjectDDHMC(P); | ||||
|   } | ||||
| }; | ||||
| #include "impl.h" | ||||
| template class WilsonKernels<IMPLEMENTATION>; | ||||
| 
 | ||||
| NAMESPACE_END(Grid); | ||||
| 
 | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonTMFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| #define IMPLEMENTATION WilsonImplDF | ||||
| @@ -0,0 +1 @@ | ||||
| ../CayleyFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../ContinuedFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../DomainWallEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../MobiusEOFAFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../PartialFractionFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonCloverFermionInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermion5DInstantiation.cc.master | ||||
| @@ -0,0 +1 @@ | ||||
| ../WilsonFermionInstantiation.cc.master | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user