mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Merge branch 'feature/hdcr' into develop
This commit is contained in:
		| @@ -46,16 +46,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| template<class vobj,class CComplex> | template<class vobj,class CComplex> | ||||||
| inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1, | inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, | ||||||
| 				    Lattice<CComplex> &CoarseInner2, | 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask, | ||||||
| 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask1, |  | ||||||
| 				    const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask2, |  | ||||||
| 				    const Lattice<vobj> &fineX, | 				    const Lattice<vobj> &fineX, | ||||||
| 				    const Lattice<vobj> &fineY) | 				    const Lattice<vobj> &fineY) | ||||||
| { | { | ||||||
|   typedef decltype(innerProduct(vobj(),vobj())) dotp; |   typedef decltype(innerProduct(vobj(),vobj())) dotp; | ||||||
|  |  | ||||||
|   GridBase *coarse(CoarseInner1.Grid()); |   GridBase *coarse(CoarseInner.Grid()); | ||||||
|   GridBase *fine  (fineX.Grid()); |   GridBase *fine  (fineX.Grid()); | ||||||
|  |  | ||||||
|   Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); |   Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); | ||||||
| @@ -64,12 +62,8 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1, | |||||||
|   // Multiply could be fused with innerProduct |   // Multiply could be fused with innerProduct | ||||||
|   // Single block sum kernel could do both masks. |   // Single block sum kernel could do both masks. | ||||||
|   fine_inner = localInnerProduct(fineX,fineY); |   fine_inner = localInnerProduct(fineX,fineY); | ||||||
|  |   mult(fine_inner_msk, fine_inner,FineMask); | ||||||
|   mult(fine_inner_msk, fine_inner,FineMask1); |   blockSum(CoarseInner,fine_inner_msk); | ||||||
|   blockSum(CoarseInner1,fine_inner_msk); |  | ||||||
|  |  | ||||||
|   mult(fine_inner_msk, fine_inner,FineMask2); |  | ||||||
|   blockSum(CoarseInner2,fine_inner_msk); |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -794,7 +788,7 @@ public: | |||||||
|  |  | ||||||
|     Lattice<iScalar<vInteger> > coor (FineGrid); |     Lattice<iScalar<vInteger> > coor (FineGrid); | ||||||
|     Lattice<iScalar<vInteger> > bcoor(FineGrid); |     Lattice<iScalar<vInteger> > bcoor(FineGrid); | ||||||
|     Lattice<iScalar<vInteger> > bcb  (FineGrid); |     Lattice<iScalar<vInteger> > bcb  (FineGrid); bcb = Zero(); | ||||||
|  |  | ||||||
|     CoarseVector iProj(Grid());  |     CoarseVector iProj(Grid());  | ||||||
|     CoarseVector oProj(Grid());  |     CoarseVector oProj(Grid());  | ||||||
| @@ -868,7 +862,7 @@ public: | |||||||
| 	 | 	 | ||||||
| 	  for(int j=0;j<nbasis;j++){ | 	  for(int j=0;j<nbasis;j++){ | ||||||
| 	     | 	     | ||||||
| 	    blockMaskedInnerProduct(iZProj,oZProj,imask,omask,Subspace.subspace[j],Mphi); | 	    blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); | ||||||
| 	     | 	     | ||||||
| 	    auto iZProj_v = iZProj.View() ; | 	    auto iZProj_v = iZProj.View() ; | ||||||
| 	    auto oZProj_v = oZProj.View() ; | 	    auto oZProj_v = oZProj.View() ; | ||||||
| @@ -876,6 +870,8 @@ public: | |||||||
| 	    auto A_self  = A[self_stencil].View(); | 	    auto A_self  = A[self_stencil].View(); | ||||||
|  |  | ||||||
| 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | ||||||
|  | 	    //      if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} | ||||||
|  | 	    //	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); }); | ||||||
|  |  | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
| @@ -886,9 +882,8 @@ public: | |||||||
|       /////////////////////////////////////////// |       /////////////////////////////////////////// | ||||||
|       { |       { | ||||||
| 	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie); | 	mult(tmp,phi,evenmask);  linop.Op(tmp,Mphie); | ||||||
| 	mult(tmp,phi,oddmask );   linop.Op(tmp,Mphio); | 	mult(tmp,phi,oddmask );  linop.Op(tmp,Mphio); | ||||||
|  |  | ||||||
| 	//	tmp = Mphie*evenmask + Mphio*oddmask; |  | ||||||
| 	{ | 	{ | ||||||
| 	  auto tmp_      = tmp.View(); | 	  auto tmp_      = tmp.View(); | ||||||
| 	  auto evenmask_ = evenmask.View(); | 	  auto evenmask_ = evenmask.View(); | ||||||
| @@ -904,15 +899,17 @@ public: | |||||||
|  |  | ||||||
| 	auto SelfProj_ = SelfProj.View(); | 	auto SelfProj_ = SelfProj.View(); | ||||||
| 	auto A_self  = A[self_stencil].View(); | 	auto A_self  = A[self_stencil].View(); | ||||||
|  |  | ||||||
| 	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ | 	accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ | ||||||
| 	  for(int j=0;j<nbasis;j++){ | 	  for(int j=0;j<nbasis;j++){ | ||||||
| 	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j)); | 	    coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j)); | ||||||
| 	  } | 	  } | ||||||
| 	}); | 	}); | ||||||
|  |  | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     if(hermitian) { |     if(hermitian) { | ||||||
|       std::cout << GridLogMessage << " ForceHermitian "<<std::endl; |       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; | ||||||
|       ForceHermitian(); |       ForceHermitian(); | ||||||
|     } |     } | ||||||
|       // AssertHermitian(); |       // AssertHermitian(); | ||||||
|   | |||||||
| @@ -336,7 +336,7 @@ public: | |||||||
|     }; |     }; | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|     // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta |     // Left  handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta  -->  ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta | ||||||
|     // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi |     // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta  -->  ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi | ||||||
|     /////////////////////////////////////////////////////////////////////////////////////////////////// |     /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|     template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ; |     template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ; | ||||||
|     template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ; |     template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ; | ||||||
|   | |||||||
| @@ -59,16 +59,15 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i | |||||||
| { | { | ||||||
|   typedef decltype(basis[0].View()) View; |   typedef decltype(basis[0].View()) View; | ||||||
|   auto tmp_v = basis[0].View(); |   auto tmp_v = basis[0].View(); | ||||||
|   std::vector<View> basis_v(basis.size(),tmp_v); |   Vector<View> basis_v(basis.size(),tmp_v); | ||||||
|   typedef typename Field::vector_object vobj; |   typedef typename Field::vector_object vobj; | ||||||
|   GridBase* grid = basis[0].Grid(); |   GridBase* grid = basis[0].Grid(); | ||||||
|        |  | ||||||
|   for(int k=0;k<basis.size();k++){ |   for(int k=0;k<basis.size();k++){ | ||||||
|     basis_v[k] = basis[k].View(); |     basis_v[k] = basis[k].View(); | ||||||
|   } |   } | ||||||
|  | #if 0 | ||||||
|   std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private |   std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private | ||||||
|  |  | ||||||
|   thread_region |   thread_region | ||||||
|   { |   { | ||||||
|     vobj* B = Bt.data() + Nm * thread_num(); |     vobj* B = Bt.data() + Nm * thread_num(); | ||||||
| @@ -86,24 +85,89 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i | |||||||
|       } |       } | ||||||
|     }); |     }); | ||||||
|   } |   } | ||||||
|  | #else | ||||||
|  |  | ||||||
|  |   int nrot = j1-j0; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   uint64_t oSites   =grid->oSites(); | ||||||
|  |   uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead | ||||||
|  |  | ||||||
|  |   //  printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock); | ||||||
|  |  | ||||||
|  |   Vector <vobj> Bt(siteBlock * nrot);  | ||||||
|  |   auto Bp=&Bt[0]; | ||||||
|  |  | ||||||
|  |   // GPU readable copy of Eigen matrix | ||||||
|  |   Vector<double> Qt_jv(Nm*Nm); | ||||||
|  |   double *Qt_p = & Qt_jv[0]; | ||||||
|  |   for(int k=0;k<Nm;++k){ | ||||||
|  |     for(int j=0;j<Nm;++j){ | ||||||
|  |       Qt_p[j*Nm+k]=Qt(j,k); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Block the loop to keep storage footprint down | ||||||
|  |   vobj zz=Zero(); | ||||||
|  |   for(uint64_t s=0;s<oSites;s+=siteBlock){ | ||||||
|  |  | ||||||
|  |     // remaining work in this block | ||||||
|  |     int ssites=MIN(siteBlock,oSites-s); | ||||||
|  |  | ||||||
|  |     // zero out the accumulators | ||||||
|  |     accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{ | ||||||
|  | 	auto z=coalescedRead(zz); | ||||||
|  | 	coalescedWrite(Bp[ss],z); | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{ | ||||||
|  | 	 | ||||||
|  |       int j =sj%nrot; | ||||||
|  |       int jj  =j0+j; | ||||||
|  |       int ss =sj/nrot; | ||||||
|  |       int sss=ss+s; | ||||||
|  |  | ||||||
|  |       for(int k=k0; k<k1; ++k){ | ||||||
|  | 	auto tmp = coalescedRead(Bp[ss*nrot+j]); | ||||||
|  | 	coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); | ||||||
|  |       } | ||||||
|  |     }); | ||||||
|  |  | ||||||
|  |     accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{ | ||||||
|  |       int j =sj%nrot; | ||||||
|  |       int jj  =j0+j; | ||||||
|  |       int ss =sj/nrot; | ||||||
|  |       int sss=ss+s; | ||||||
|  |       coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| // Extract a single rotated vector | // Extract a single rotated vector | ||||||
| template<class Field> | template<class Field> | ||||||
| void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)  | void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)  | ||||||
| { | { | ||||||
|  |   typedef decltype(basis[0].View()) View; | ||||||
|   typedef typename Field::vector_object vobj; |   typedef typename Field::vector_object vobj; | ||||||
|   GridBase* grid = basis[0].Grid(); |   GridBase* grid = basis[0].Grid(); | ||||||
|  |  | ||||||
|   result.Checkerboard() = basis[0].Checkerboard(); |   result.Checkerboard() = basis[0].Checkerboard(); | ||||||
|   auto result_v=result.View(); |   auto result_v=result.View(); | ||||||
|   thread_for(ss, grid->oSites(),{ |   Vector<View> basis_v(basis.size(),result_v); | ||||||
|     vobj B = Zero(); |   for(int k=0;k<basis.size();k++){ | ||||||
|  |     basis_v[k] = basis[k].View(); | ||||||
|  |   } | ||||||
|  |   vobj zz=Zero(); | ||||||
|  |   Vector<double> Qt_jv(Nm); | ||||||
|  |   double * Qt_j = & Qt_jv[0]; | ||||||
|  |   for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k); | ||||||
|  |   accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{ | ||||||
|  |     auto B=coalescedRead(zz); | ||||||
|     for(int k=k0; k<k1; ++k){ |     for(int k=k0; k<k1; ++k){ | ||||||
|       auto basis_k = basis[k].View(); |       B +=Qt_j[k] * coalescedRead(basis_v[k][ss]); | ||||||
|       B +=Qt(j,k) * basis_k[ss]; |  | ||||||
|     } |     } | ||||||
|     result_v[ss] = B; |     coalescedWrite(result_v[ss], B); | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -303,7 +367,7 @@ public: | |||||||
| 			       RealD _eresid, // resid in lmdue deficit  | 			       RealD _eresid, // resid in lmdue deficit  | ||||||
| 			       int _MaxIter, // Max iterations | 			       int _MaxIter, // Max iterations | ||||||
| 			       RealD _betastp=0.0, // if beta(k) < betastp: converged | 			       RealD _betastp=0.0, // if beta(k) < betastp: converged | ||||||
| 			       int _MinRestart=1, int _orth_period = 1, | 			       int _MinRestart=0, int _orth_period = 1, | ||||||
| 			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : | 			       IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : | ||||||
|     SimpleTester(HermOp),  _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(SimpleTester), |     SimpleTester(HermOp),  _PolyOp(PolyOp),      _HermOp(HermOp), _Tester(SimpleTester), | ||||||
|     Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm), |     Nstop(_Nstop)  ,      Nk(_Nk),      Nm(_Nm), | ||||||
| @@ -377,14 +441,17 @@ until convergence | |||||||
|     { |     { | ||||||
|       auto src_n = src; |       auto src_n = src; | ||||||
|       auto tmp = src; |       auto tmp = src; | ||||||
|  |       std::cout << GridLogIRL << " IRL source norm " << norm2(src) << std::endl; | ||||||
|       const int _MAX_ITER_IRL_MEVAPP_ = 50; |       const int _MAX_ITER_IRL_MEVAPP_ = 50; | ||||||
|       for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { |       for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) { | ||||||
| 	normalise(src_n); | 	normalise(src_n); | ||||||
| 	_HermOp(src_n,tmp); | 	_HermOp(src_n,tmp); | ||||||
|  | 	//	std::cout << GridLogMessage<< tmp<<std::endl; exit(0); | ||||||
|  | 	//	std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl; | ||||||
| 	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. | 	RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. | ||||||
| 	RealD vden = norm2(src_n); | 	RealD vden = norm2(src_n); | ||||||
| 	RealD na = vnum/vden; | 	RealD na = vnum/vden; | ||||||
| 	if (fabs(evalMaxApprox/na - 1.0) < 0.05) | 	if (fabs(evalMaxApprox/na - 1.0) < 0.0001) | ||||||
| 	  i=_MAX_ITER_IRL_MEVAPP_; | 	  i=_MAX_ITER_IRL_MEVAPP_; | ||||||
| 	evalMaxApprox = na; | 	evalMaxApprox = na; | ||||||
| 	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; | 	std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl; | ||||||
|   | |||||||
| @@ -60,5 +60,53 @@ public: | |||||||
|   }      |   }      | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | template<class Field> class HPDSolver { | ||||||
|  | private: | ||||||
|  |   LinearOperatorBase<Field> & _Matrix; | ||||||
|  |   OperatorFunction<Field> & _HermitianSolver; | ||||||
|  |   LinearFunction<Field>   & _Guess; | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////// | ||||||
|  |   // Wrap the usual normal equations trick | ||||||
|  |   ///////////////////////////////////////////////////// | ||||||
|  |  HPDSolver(LinearOperatorBase<Field> &Matrix, | ||||||
|  | 	   OperatorFunction<Field> &HermitianSolver, | ||||||
|  | 	   LinearFunction<Field> &Guess)  | ||||||
|  |    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};  | ||||||
|  |  | ||||||
|  |   void operator() (const Field &in, Field &out){ | ||||||
|  |   | ||||||
|  |     _Guess(in,out); | ||||||
|  |     _HermitianSolver(_Matrix,in,out);  // Mdag M out = Mdag in | ||||||
|  |  | ||||||
|  |   }      | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class Field> class MdagMSolver { | ||||||
|  | private: | ||||||
|  |   SparseMatrixBase<Field> & _Matrix; | ||||||
|  |   OperatorFunction<Field> & _HermitianSolver; | ||||||
|  |   LinearFunction<Field>   & _Guess; | ||||||
|  | public: | ||||||
|  |  | ||||||
|  |   ///////////////////////////////////////////////////// | ||||||
|  |   // Wrap the usual normal equations trick | ||||||
|  |   ///////////////////////////////////////////////////// | ||||||
|  |  MdagMSolver(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver, | ||||||
|  | 	     LinearFunction<Field> &Guess)  | ||||||
|  |    :  _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};  | ||||||
|  |  | ||||||
|  |   void operator() (const Field &in, Field &out){ | ||||||
|  |   | ||||||
|  |     MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix); | ||||||
|  |     _Guess(in,out); | ||||||
|  |  | ||||||
|  |     _HermitianSolver(MdagMOp,in,out);  // Mdag M out = Mdag in | ||||||
|  |  | ||||||
|  |   }      | ||||||
|  | }; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -118,7 +118,7 @@ public: | |||||||
|  |  | ||||||
|     } |     } | ||||||
|     GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl; |     GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl; | ||||||
|     assert(0); |     //    assert(0); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   RealD GCRnStep(const Field &src, Field &psi,RealD rsq){ |   RealD GCRnStep(const Field &src, Field &psi,RealD rsq){ | ||||||
|   | |||||||
| @@ -47,20 +47,19 @@ public: | |||||||
|   // Give Lattice access |   // Give Lattice access | ||||||
|   template<class object> friend class Lattice; |   template<class object> friend class Lattice; | ||||||
|  |  | ||||||
|   GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) {};  |   GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) { LocallyPeriodic=0;};  | ||||||
|  |  | ||||||
|   GridBase(const Coordinate & processor_grid, |   GridBase(const Coordinate & processor_grid, | ||||||
| 	   const CartesianCommunicator &parent, | 	   const CartesianCommunicator &parent, | ||||||
| 	   int &split_rank)  | 	   int &split_rank)  | ||||||
|     : CartesianCommunicator(processor_grid,parent,split_rank) {}; |     : CartesianCommunicator(processor_grid,parent,split_rank) {LocallyPeriodic=0;}; | ||||||
|  |  | ||||||
|   GridBase(const Coordinate & processor_grid, |   GridBase(const Coordinate & processor_grid, | ||||||
| 	   const CartesianCommunicator &parent)  | 	   const CartesianCommunicator &parent)  | ||||||
|     : CartesianCommunicator(processor_grid,parent,dummy) {}; |     : CartesianCommunicator(processor_grid,parent,dummy) {LocallyPeriodic=0;}; | ||||||
|  |  | ||||||
|   virtual ~GridBase() = default; |   virtual ~GridBase() = default; | ||||||
|  |  | ||||||
|  |  | ||||||
|   // Physics Grid information. |   // Physics Grid information. | ||||||
|   Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes. |   Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||||
|   Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal |   Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||||
| @@ -80,7 +79,8 @@ public: | |||||||
|   Coordinate _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] |   Coordinate _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||||
|   Coordinate _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 |   Coordinate _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||||
|  |  | ||||||
|     bool _isCheckerBoarded;  |   bool _isCheckerBoarded;  | ||||||
|  |   int        LocallyPeriodic; | ||||||
|  |  | ||||||
| public: | public: | ||||||
|  |  | ||||||
|   | |||||||
| @@ -173,6 +173,7 @@ public: | |||||||
|   /////////////////////////////////////////////////// |   /////////////////////////////////////////////////// | ||||||
|   typedef typename vobj::scalar_type scalar_type; |   typedef typename vobj::scalar_type scalar_type; | ||||||
|   typedef typename vobj::vector_type vector_type; |   typedef typename vobj::vector_type vector_type; | ||||||
|  |   typedef typename vobj::scalar_object scalar_object; | ||||||
|   typedef vobj vector_object; |   typedef vobj vector_object; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|   | |||||||
| @@ -156,7 +156,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){ | |||||||
| // Peek a scalar object from the SIMD array | // Peek a scalar object from the SIMD array | ||||||
| ////////////////////////////////////////////////////////// | ////////////////////////////////////////////////////////// | ||||||
| template<class vobj,class sobj> | template<class vobj,class sobj> | ||||||
| void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){ | accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){ | ||||||
|          |          | ||||||
|   GridBase *grid = l.Grid(); |   GridBase *grid = l.Grid(); | ||||||
|  |  | ||||||
| @@ -185,7 +185,7 @@ void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){ | |||||||
| }; | }; | ||||||
|  |  | ||||||
| template<class vobj,class sobj> | template<class vobj,class sobj> | ||||||
| void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){ | accelerator_inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){ | ||||||
|  |  | ||||||
|   GridBase *grid=l.Grid(); |   GridBase *grid=l.Grid(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -439,6 +439,67 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out) | |||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template<class vobj> | ||||||
|  | void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize) | ||||||
|  | { | ||||||
|  |   typedef typename vobj::scalar_object sobj; | ||||||
|  |   typedef typename vobj::scalar_type scalar_type; | ||||||
|  |   typedef typename vobj::vector_type vector_type; | ||||||
|  |  | ||||||
|  |   static const int words=sizeof(vobj)/sizeof(vector_type); | ||||||
|  |  | ||||||
|  |   GridBase *Fg = From.Grid(); | ||||||
|  |   GridBase *Tg = To.Grid(); | ||||||
|  |   assert(!Fg->_isCheckerBoarded); | ||||||
|  |   assert(!Tg->_isCheckerBoarded); | ||||||
|  |   int Nsimd = Fg->Nsimd(); | ||||||
|  |   int nF = Fg->_ndimension; | ||||||
|  |   int nT = Tg->_ndimension; | ||||||
|  |   int nd = nF; | ||||||
|  |   assert(nF == nT); | ||||||
|  |  | ||||||
|  |   for(int d=0;d<nd;d++){ | ||||||
|  |     assert(Fg->_processors[d]  == Tg->_processors[d]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // the above should guarantee that the operations are local | ||||||
|  |   Coordinate ldf = Fg->_ldimensions; | ||||||
|  |   Coordinate rdf = Fg->_rdimensions; | ||||||
|  |   Coordinate isf = Fg->_istride; | ||||||
|  |   Coordinate osf = Fg->_ostride; | ||||||
|  |   Coordinate rdt = Tg->_rdimensions; | ||||||
|  |   Coordinate ist = Tg->_istride; | ||||||
|  |   Coordinate ost = Tg->_ostride; | ||||||
|  |   auto t_v = To.View(); | ||||||
|  |   auto f_v = From.View(); | ||||||
|  |   accelerator_for(idx,Fg->lSites(),1,{ | ||||||
|  |     sobj s; | ||||||
|  |     Coordinate Fcoor(nd); | ||||||
|  |     Coordinate Tcoor(nd); | ||||||
|  |     Lexicographic::CoorFromIndex(Fcoor,idx,ldf); | ||||||
|  |     int in_region=1; | ||||||
|  |     for(int d=0;d<nd;d++){ | ||||||
|  |       if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){  | ||||||
|  | 	in_region=0; | ||||||
|  |       } | ||||||
|  |       Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d]; | ||||||
|  |     } | ||||||
|  |     if (in_region) { | ||||||
|  |       Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); | ||||||
|  |       Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); | ||||||
|  |       Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); | ||||||
|  |       Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); | ||||||
|  |       scalar_type * fp = (scalar_type *)&f_v[odx_f]; | ||||||
|  |       scalar_type * tp = (scalar_type *)&t_v[odx_t]; | ||||||
|  |       for(int w=0;w<words;w++){ | ||||||
|  | 	tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd];  // FIXME IF RRII layout, type pun no worke | ||||||
|  |       } | ||||||
|  |       //      peekLocalSite(s,From,Fcoor); | ||||||
|  |       //      pokeLocalSite(s,To  ,Tcoor); | ||||||
|  |     } | ||||||
|  |   }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> | template<class vobj> | ||||||
| void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user