mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Merge pull request #457 from lehner/feature/gpt
Import GPT-related updates
This commit is contained in:
		| @@ -62,7 +62,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) | |||||||
|     basis_v.push_back(basis[k].View(AcceleratorWrite)); |     basis_v.push_back(basis[k].View(AcceleratorWrite)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
| #if ( (!defined(GRID_CUDA)) ) | #if ( !(defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)) ) | ||||||
|   int max_threads = thread_max(); |   int max_threads = thread_max(); | ||||||
|   Vector < vobj > Bt(Nm * max_threads); |   Vector < vobj > Bt(Nm * max_threads); | ||||||
|   thread_region |   thread_region | ||||||
|   | |||||||
| @@ -469,15 +469,13 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) | |||||||
|   Coordinate fine_rdimensions = fine->_rdimensions; |   Coordinate fine_rdimensions = fine->_rdimensions; | ||||||
|   Coordinate coarse_rdimensions = coarse->_rdimensions; |   Coordinate coarse_rdimensions = coarse->_rdimensions; | ||||||
|  |  | ||||||
|   vobj zz = Zero(); |  | ||||||
|    |  | ||||||
|   accelerator_for(sc,coarse->oSites(),1,{ |   accelerator_for(sc,coarse->oSites(),1,{ | ||||||
|  |  | ||||||
|       // One thread per sub block |       // One thread per sub block | ||||||
|       Coordinate coor_c(_ndimension); |       Coordinate coor_c(_ndimension); | ||||||
|       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate |       Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions);  // Block coordinate | ||||||
|  |  | ||||||
|       vobj cd = zz; |       vobj cd = Zero(); | ||||||
|        |        | ||||||
|       for(int sb=0;sb<blockVol;sb++){ |       for(int sb=0;sb<blockVol;sb++){ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -45,6 +45,7 @@ public: | |||||||
|   }; |   }; | ||||||
|   // Host only |   // Host only | ||||||
|   GridBase * getGrid(void) const { return _grid; }; |   GridBase * getGrid(void) const { return _grid; }; | ||||||
|  |   vobj* getHostPointer(void) const { return _odata; }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -63,6 +63,8 @@ public: | |||||||
|   virtual void MooeeDag(const FermionField &in, FermionField &out) ; |   virtual void MooeeDag(const FermionField &in, FermionField &out) ; | ||||||
|   virtual void MooeeInv(const FermionField &in, FermionField &out) ; |   virtual void MooeeInv(const FermionField &in, FermionField &out) ; | ||||||
|   virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; |   virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; | ||||||
|  |   virtual void M(const FermionField &in, FermionField &out) ; | ||||||
|  |   virtual void Mdag(const FermionField &in, FermionField &out) ; | ||||||
|    |    | ||||||
| private: | private: | ||||||
|   RealD mu; // TwistedMass parameter |   RealD mu; // TwistedMass parameter | ||||||
|   | |||||||
| @@ -93,5 +93,25 @@ void WilsonTMFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &ou | |||||||
|   RealD b    = tm /sq; |   RealD b    = tm /sq; | ||||||
|   axpibg5x(out,in,a,b); |   axpibg5x(out,in,a,b); | ||||||
| } | } | ||||||
|  | template<class Impl> | ||||||
|  | void WilsonTMFermion<Impl>::M(const FermionField &in, FermionField &out) { | ||||||
|  |   out.Checkerboard() = in.Checkerboard(); | ||||||
|  |   this->Dhop(in, out, DaggerNo); | ||||||
|  |   FermionField tmp(out.Grid()); | ||||||
|  |   RealD a = 4.0+this->mass; | ||||||
|  |   RealD b = this->mu; | ||||||
|  |   axpibg5x(tmp,in,a,b); | ||||||
|  |   axpy(out, 1.0, tmp, out); | ||||||
|  | } | ||||||
|  | template<class Impl> | ||||||
|  | void WilsonTMFermion<Impl>::Mdag(const FermionField &in, FermionField &out) { | ||||||
|  |   out.Checkerboard() = in.Checkerboard(); | ||||||
|  |   this->Dhop(in, out, DaggerYes); | ||||||
|  |   FermionField tmp(out.Grid()); | ||||||
|  |   RealD a = 4.0+this->mass; | ||||||
|  |   RealD b = -this->mu; | ||||||
|  |   axpibg5x(tmp,in,a,b); | ||||||
|  |   axpy(out, 1.0, tmp, out); | ||||||
|  | } | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -706,7 +706,7 @@ public: | |||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl; |     //std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl; | ||||||
|   } |   } | ||||||
|   /// Introduce a block structure and switch off comms on boundaries |   /// Introduce a block structure and switch off comms on boundaries | ||||||
|   void DirichletBlock(const Coordinate &dirichlet_block) |   void DirichletBlock(const Coordinate &dirichlet_block) | ||||||
| @@ -761,7 +761,8 @@ public: | |||||||
| 		   int checkerboard, | 		   int checkerboard, | ||||||
| 		   const std::vector<int> &directions, | 		   const std::vector<int> &directions, | ||||||
| 		   const std::vector<int> &distances, | 		   const std::vector<int> &distances, | ||||||
| 		   Parameters p=Parameters()) | 		   Parameters p=Parameters(), | ||||||
|  | 		   bool preserve_shm=false) | ||||||
|   { |   { | ||||||
|     face_table_computed=0; |     face_table_computed=0; | ||||||
|     _grid    = grid; |     _grid    = grid; | ||||||
| @@ -855,6 +856,8 @@ public: | |||||||
|     ///////////////////////////////////////////////////////////////////////////////// |     ///////////////////////////////////////////////////////////////////////////////// | ||||||
|     const int Nsimd = grid->Nsimd(); |     const int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
|  |     // Allow for multiple stencils to exist simultaneously | ||||||
|  |     if (!preserve_shm) | ||||||
|       _grid->ShmBufferFreeAll(); |       _grid->ShmBufferFreeAll(); | ||||||
|  |  | ||||||
|     int maxl=2; |     int maxl=2; | ||||||
|   | |||||||
| @@ -288,11 +288,12 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { | |||||||
| #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | #define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... )	\ | ||||||
|   theGridAccelerator->submit([&](cl::sycl::handler &cgh) {		\ |   theGridAccelerator->submit([&](cl::sycl::handler &cgh) {		\ | ||||||
|     unsigned long nt=acceleratorThreads();				\ |     unsigned long nt=acceleratorThreads();				\ | ||||||
|  |     if(nt < 8)nt=8;							\ | ||||||
|     unsigned long unum1 = num1;						\ |     unsigned long unum1 = num1;						\ | ||||||
|     unsigned long unum2 = num2;						\ |     unsigned long unum2 = num2;						\ | ||||||
|       if(nt < 8)nt=8;							\ |     unsigned long unum1_divisible_by_nt = ((unum1 + nt - 1) / nt) * nt;	\ | ||||||
|     cl::sycl::range<3> local {nt,1,nsimd};				\ |     cl::sycl::range<3> local {nt,1,nsimd};				\ | ||||||
|       cl::sycl::range<3> global{unum1,unum2,nsimd};			\ |     cl::sycl::range<3> global{unum1_divisible_by_nt,unum2,nsimd};	\ | ||||||
|     cgh.parallel_for(							\ |     cgh.parallel_for(							\ | ||||||
| 		     cl::sycl::nd_range<3>(global,local),		\ | 		     cl::sycl::nd_range<3>(global,local),		\ | ||||||
| 		     [=] (cl::sycl::nd_item<3> item) /*mutable*/	\ | 		     [=] (cl::sycl::nd_item<3> item) /*mutable*/	\ | ||||||
| @@ -301,7 +302,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { | |||||||
| 		       auto iter1    = item.get_global_id(0);		\ | 		       auto iter1    = item.get_global_id(0);		\ | ||||||
| 		       auto iter2    = item.get_global_id(1);		\ | 		       auto iter2    = item.get_global_id(1);		\ | ||||||
| 		       auto lane     = item.get_global_id(2);		\ | 		       auto lane     = item.get_global_id(2);		\ | ||||||
|       { __VA_ARGS__ };				      \ | 		       { if (iter1 < unum1){ __VA_ARGS__ } };		\ | ||||||
| 		     });						\ | 		     });						\ | ||||||
|   }); |   }); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user