mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-30 19:44:32 +00:00 
			
		
		
		
	Partial dirichlet support
This commit is contained in:
		| @@ -3,26 +3,108 @@ | |||||||
|  |  | ||||||
| NAMESPACE_BEGIN(Grid); | NAMESPACE_BEGIN(Grid); | ||||||
|  |  | ||||||
| template<class vobj> | class SimpleStencilParams{ | ||||||
| accelerator_inline void exchangeSIMT(vobj &mp0,vobj &mp1,const vobj &vp0,const vobj &vp1,Integer type) | public: | ||||||
| { |   Coordinate dirichlet; | ||||||
|     typedef decltype(coalescedRead(mp0)) sobj; |   int partialDirichlet; | ||||||
|     unsigned int Nsimd = vobj::Nsimd(); |   SimpleStencilParams() { partialDirichlet = 0; }; | ||||||
|     unsigned int mask = Nsimd >> (type + 1); | }; | ||||||
|     int lane = acceleratorSIMTlane(Nsimd); |  | ||||||
|     int j0 = lane &(~mask); // inner coor zero |  | ||||||
|     int j1 = lane |(mask) ; // inner coor one |  | ||||||
|     const vobj *vpa = &vp0; |  | ||||||
|     const vobj *vpb = &vp1; |  | ||||||
|     const vobj *vp = (lane&mask) ? (vpb) : (vpa); |  | ||||||
|     auto sa = coalescedRead(vp[0],j0); |  | ||||||
|     auto sb = coalescedRead(vp[0],j1); |  | ||||||
|     coalescedWrite(mp0,sa); |  | ||||||
|     coalescedWrite(mp1,sb); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class vobj> |  | ||||||
| class SimpleCompressor { | // Compressors will inherit buffer management policies | ||||||
|  | // Standard comms buffer management | ||||||
|  | class FaceGatherSimple | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |   static int PartialCompressionFactor(GridBase *grid) {return 1;}; | ||||||
|  |   // Decompress is after merge so ok | ||||||
|  |   template<class vobj,class cobj,class compressor>  | ||||||
|  |   static void Gather_plane_simple (commVector<std::pair<int,int> >& table, | ||||||
|  | 				   const Lattice<vobj> &rhs, | ||||||
|  | 				   cobj *buffer, | ||||||
|  | 				   compressor &compress, | ||||||
|  | 				   int off,int so,int partial) | ||||||
|  |   { | ||||||
|  |     int num=table.size(); | ||||||
|  |     std::pair<int,int> *table_v = & table[0]; | ||||||
|  |      | ||||||
|  |     auto rhs_v = rhs.View(AcceleratorRead); | ||||||
|  |     accelerator_forNB( i,num, vobj::Nsimd(), { | ||||||
|  | 	compress.Compress(buffer[off+table_v[i].first],rhs_v[so+table_v[i].second]); | ||||||
|  |     }); | ||||||
|  |     rhs_v.ViewClose(); | ||||||
|  |   } | ||||||
|  |   template<class vobj,class cobj,class compressor> | ||||||
|  |   static void Gather_plane_exchange(commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs, | ||||||
|  | 				    Vector<cobj *> pointers,int dimension,int plane,int cbmask, | ||||||
|  | 				    compressor &compress,int type,int partial) | ||||||
|  |   { | ||||||
|  |     assert( (table.size()&0x1)==0); | ||||||
|  |     int num=table.size()/2; | ||||||
|  |     int so  = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane | ||||||
|  |      | ||||||
|  |     auto rhs_v = rhs.View(AcceleratorRead); | ||||||
|  |     auto p0=&pointers[0][0]; | ||||||
|  |     auto p1=&pointers[1][0]; | ||||||
|  |     auto tp=&table[0]; | ||||||
|  |     auto rhs_p = &rhs_v[0]; | ||||||
|  |     accelerator_forNB(j, num, vobj::Nsimd(), { | ||||||
|  | 	compress.CompressExchange(p0[j],p1[j], | ||||||
|  | 				  rhs_p[so+tp[2*j  ].second], | ||||||
|  | 				  rhs_p[so+tp[2*j+1].second], | ||||||
|  | 				  type); | ||||||
|  |     }); | ||||||
|  |     rhs_v.ViewClose(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   template<class decompressor,class Decompression> | ||||||
|  |   static void DecompressFace(decompressor decompress,Decompression &dd) | ||||||
|  |   { | ||||||
|  |     auto kp = dd.kernel_p; | ||||||
|  |     auto mp = dd.mpi_p; | ||||||
|  |     accelerator_forNB(o,dd.buffer_size,1,{ | ||||||
|  |       decompress.Decompress(kp[o],mp[o]); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  |   template<class decompressor,class Merger> | ||||||
|  |   static void MergeFace(decompressor decompress,Merger &mm) | ||||||
|  |   { | ||||||
|  |     auto mp = &mm.mpointer[0]; | ||||||
|  |     auto vp0= &mm.vpointers[0][0]; | ||||||
|  |     auto vp1= &mm.vpointers[1][0]; | ||||||
|  |     auto type= mm.type; | ||||||
|  |     accelerator_forNB(o,mm.buffer_size/2,vobj::Nsimd(),{ | ||||||
|  | 	decompress.Exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); | ||||||
|  |     }); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | //////////////////////////////////// | ||||||
|  | // Wilson compressor will add alternate policies for Dirichlet | ||||||
|  | // and possibly partial Dirichlet for DWF | ||||||
|  | //////////////////////////////////// | ||||||
|  | /* | ||||||
|  | class FaceGatherDirichlet | ||||||
|  | { | ||||||
|  |   // If it's dirichlet we don't assemble comms buffers | ||||||
|  |   // | ||||||
|  |   // Rely on zeroes in gauge field to drive the correct result | ||||||
|  |   // NAN propgagation: field will locally wrap, so fermion should NOT contain NAN and just permute | ||||||
|  |   template<class vobj,class cobj,class compressor> | ||||||
|  |   static void Gather_plane_simple (commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so){}; | ||||||
|  |   template<class vobj,class cobj,class compressor> | ||||||
|  |   static void Gather_plane_exchange(commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs, | ||||||
|  | 				   Vector<cobj *> pointers,int dimension,int plane,int cbmask, | ||||||
|  | 				   compressor &compress,int type) {} | ||||||
|  |   template<class decompressor,class Merger> | ||||||
|  |   static void Merge(decompressor decompress,Merge &mm)  {  } | ||||||
|  |   template<class decompressor,class Decompression> | ||||||
|  |   static void Decompress(decompressor decompress,Decompression &dd) {} | ||||||
|  | }; | ||||||
|  | */ | ||||||
|  |  | ||||||
|  | template<class vobj,class FaceGather> | ||||||
|  | class SimpleCompressorGather : public FaceGather { | ||||||
| public: | public: | ||||||
|   void Point(int) {}; |   void Point(int) {}; | ||||||
|   accelerator_inline int  CommDatumSize(void) const { return sizeof(vobj); } |   accelerator_inline int  CommDatumSize(void) const { return sizeof(vobj); } | ||||||
| @@ -30,20 +112,19 @@ public: | |||||||
|   accelerator_inline void Compress(vobj &buf,const vobj &in) const { |   accelerator_inline void Compress(vobj &buf,const vobj &in) const { | ||||||
|     coalescedWrite(buf,coalescedRead(in)); |     coalescedWrite(buf,coalescedRead(in)); | ||||||
|   } |   } | ||||||
|   accelerator_inline void Exchange(vobj *mp,vobj *vp0,vobj *vp1,Integer type,Integer o) const { |   accelerator_inline void Exchange(vobj &mp0,vobj &mp1,vobj &vp0,vobj &vp1,Integer type) const { | ||||||
| #ifdef GRID_SIMT | #ifdef GRID_SIMT | ||||||
|     exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); |     exchangeSIMT(mp0,mp1,vp0,vp1,type); | ||||||
| #else | #else | ||||||
|     exchange(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type); |     exchange(mp0,mp1,vp0,vp1,type); | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
|   accelerator_inline void Decompress(vobj *out,vobj *in, int o) const { assert(0); } |   accelerator_inline void Decompress(vobj &out,vobj &in) const {  }; | ||||||
|   accelerator_inline void CompressExchange(vobj *out0,vobj *out1,const vobj *in, |   accelerator_inline void CompressExchange(vobj &out0,vobj &out1,const vobj &in0,const vobj &in1,int type) const { | ||||||
| 					   int j,int k, int m,int type) const { |  | ||||||
| #ifdef GRID_SIMT | #ifdef GRID_SIMT | ||||||
|     exchangeSIMT(out0[j],out1[j],in[k],in[m],type); |     exchangeSIMT(out0,out1,in0,in1,type); | ||||||
| #else | #else | ||||||
|     exchange(out0[j],out1[j],in[k],in[m],type); |     exchange(out0,out1,in0,in1,type); | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
|   // For cshift. Cshift should drop compressor coupling altogether  |   // For cshift. Cshift should drop compressor coupling altogether  | ||||||
| @@ -52,11 +133,18 @@ public: | |||||||
|     return arg; |     return arg; | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
| class SimpleStencilParams{ |  | ||||||
| public: | // Standard compressor never needs dirichlet. | ||||||
|   Coordinate dirichlet; | // | ||||||
|   SimpleStencilParams() {}; | // Get away with a local period wrap and rely on dirac operator to use a zero gauge link as it is faster | ||||||
| }; | // | ||||||
|  | // Compressors that inherit Dirichlet and Non-dirichlet behaviour. | ||||||
|  | // | ||||||
|  | // Currently run-time behaviour through StencilParameters paramaters, p.dirichlet | ||||||
|  | // combined with the FaceGatherSimple behaviour | ||||||
|  |  | ||||||
|  | template <class vobj> using SimpleCompressor = SimpleCompressorGather<vobj,FaceGatherSimple>; | ||||||
|  | //template <class vobj> using SimpleCompressorDirichlet = SimpleCompressorGather<vobj,FaceGatherDirichlet>; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user