mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Merge branch 'feature/bgq-asm' of https://github.com/paboyle/Grid into feature/bgq-asm
This commit is contained in:
		@@ -678,12 +678,9 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    calls++;
 | 
					    calls++;
 | 
				
			||||||
    Mergers.resize(0);
 | 
					    Mergers.resize(0);
 | 
				
			||||||
    Packets.resize(0);
 | 
					    Packets.resize(0);
 | 
				
			||||||
    _grid->StencilBarrier();
 | 
					 | 
				
			||||||
    HaloGather(source,compress);
 | 
					    HaloGather(source,compress);
 | 
				
			||||||
    this->CommunicateBegin(reqs);
 | 
					    this->CommunicateBegin(reqs);
 | 
				
			||||||
    _grid->StencilBarrier();
 | 
					 | 
				
			||||||
    this->CommunicateComplete(reqs);
 | 
					    this->CommunicateComplete(reqs);
 | 
				
			||||||
    _grid->StencilBarrier();
 | 
					 | 
				
			||||||
    CommsMerge(); // spins
 | 
					    CommsMerge(); // spins
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -171,6 +171,8 @@ namespace QCD {
 | 
				
			|||||||
  class WilsonStencil : public CartesianStencil<vobj,cobj> {
 | 
					  class WilsonStencil : public CartesianStencil<vobj,cobj> {
 | 
				
			||||||
  public:
 | 
					  public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    WilsonStencil(GridBase *grid,
 | 
					    WilsonStencil(GridBase *grid,
 | 
				
			||||||
		int npoints,
 | 
							int npoints,
 | 
				
			||||||
		int checkerboard,
 | 
							int checkerboard,
 | 
				
			||||||
@@ -178,79 +180,71 @@ namespace QCD {
 | 
				
			|||||||
		const std::vector<int> &distances)  : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) 
 | 
							const std::vector<int> &distances)  : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) 
 | 
				
			||||||
      {    };
 | 
					      {    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template < class compressor>
 | 
					 | 
				
			||||||
    std::thread HaloExchangeOptBegin(const Lattice<vobj> &source,compressor &compress) {
 | 
					 | 
				
			||||||
      this->Mergers.resize(0); 
 | 
					 | 
				
			||||||
      this->Packets.resize(0);
 | 
					 | 
				
			||||||
      this->HaloGatherOpt(source,compress);
 | 
					 | 
				
			||||||
      return std::thread([&] { this->Communicate(); });
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template < class compressor>
 | 
					    template < class compressor>
 | 
				
			||||||
    void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress) 
 | 
					    void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress) 
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      auto thr = this->HaloExchangeOptBegin(source,compress);
 | 
					      std::vector<std::vector<CommsRequest_t> > reqs;
 | 
				
			||||||
      this->HaloExchangeOptComplete(thr);
 | 
					      this->Mergers.resize(0); 
 | 
				
			||||||
 | 
					      this->Packets.resize(0);
 | 
				
			||||||
 | 
					      this->HaloGatherOpt(source,compress);
 | 
				
			||||||
 | 
					      this->CommunicateBegin(reqs);
 | 
				
			||||||
 | 
					      this->CommunicateComplete(reqs);
 | 
				
			||||||
 | 
					      this->CommsMerge(); // spins
 | 
				
			||||||
 | 
					      this->calls++;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void HaloExchangeOptComplete(std::thread &thr) 
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
	this->CommsMerge(); // spins
 | 
					 | 
				
			||||||
	this->jointime-=usecond();
 | 
					 | 
				
			||||||
	thr.join();
 | 
					 | 
				
			||||||
	this->jointime+=usecond();
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template < class compressor>
 | 
					    template < class compressor>
 | 
				
			||||||
    void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
 | 
					    void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
	// conformable(source._grid,_grid);
 | 
					      int face_idx=0;
 | 
				
			||||||
	assert(source._grid==this->_grid);
 | 
					 | 
				
			||||||
	this->halogtime-=usecond();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	assert (this->comm_buf.size() == this->_unified_buffer_size );
 | 
					      // conformable(source._grid,_grid);
 | 
				
			||||||
	this->u_comm_offset=0;
 | 
					      assert(source._grid==this->_grid);
 | 
				
			||||||
 | 
					      this->halogtime-=usecond();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      this->u_comm_offset=0;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      int dag = compress.dag;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      WilsonXpCompressor<cobj,vobj> XpCompress; 
 | 
				
			||||||
 | 
					      WilsonYpCompressor<cobj,vobj> YpCompress; 
 | 
				
			||||||
 | 
					      WilsonZpCompressor<cobj,vobj> ZpCompress; 
 | 
				
			||||||
 | 
					      WilsonTpCompressor<cobj,vobj> TpCompress;
 | 
				
			||||||
 | 
					      WilsonXmCompressor<cobj,vobj> XmCompress;
 | 
				
			||||||
 | 
					      WilsonYmCompressor<cobj,vobj> YmCompress;
 | 
				
			||||||
 | 
					      WilsonZmCompressor<cobj,vobj> ZmCompress;
 | 
				
			||||||
 | 
					      WilsonTmCompressor<cobj,vobj> TmCompress;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int dag = compress.dag;
 | 
					      // Gather all comms buffers
 | 
				
			||||||
	static std::vector<int> dirs(Nd*2);
 | 
					      //    for(int point = 0 ; point < _npoints; point++) {
 | 
				
			||||||
	for(int mu=0;mu<Nd;mu++){
 | 
					      //      compress.Point(point);
 | 
				
			||||||
	  if ( dag ) {
 | 
					      //      HaloGatherDir(source,compress,point,face_idx);
 | 
				
			||||||
	    dirs[mu]  =mu;
 | 
					      //    }
 | 
				
			||||||
	    dirs[mu+4]=mu+Nd;
 | 
					      if ( dag ) { 
 | 
				
			||||||
	  } else { 
 | 
						this->HaloGatherDir(source,XpCompress,Xp,face_idx);
 | 
				
			||||||
	    dirs[mu]  =mu+Nd;
 | 
						this->HaloGatherDir(source,YpCompress,Yp,face_idx);
 | 
				
			||||||
	    dirs[mu+Nd]=mu;
 | 
						this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
 | 
				
			||||||
	  }
 | 
						this->HaloGatherDir(source,TpCompress,Tp,face_idx);
 | 
				
			||||||
	}
 | 
						this->HaloGatherDir(source,XmCompress,Xm,face_idx);
 | 
				
			||||||
 | 
						this->HaloGatherDir(source,YmCompress,Ym,face_idx);
 | 
				
			||||||
 | 
						this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
 | 
				
			||||||
	WilsonXpCompressor<cobj,vobj> XpCompress;
 | 
						this->HaloGatherDir(source,TmCompress,Tm,face_idx);
 | 
				
			||||||
	this->HaloGatherDir(source,XpCompress,dirs[0]);
 | 
					      } else {
 | 
				
			||||||
 | 
						this->HaloGatherDir(source,XmCompress,Xp,face_idx);
 | 
				
			||||||
	WilsonYpCompressor<cobj,vobj> YpCompress;
 | 
						this->HaloGatherDir(source,YmCompress,Yp,face_idx);
 | 
				
			||||||
	this->HaloGatherDir(source,YpCompress,dirs[1]);
 | 
						this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
 | 
				
			||||||
 | 
						this->HaloGatherDir(source,TmCompress,Tp,face_idx);
 | 
				
			||||||
	WilsonZpCompressor<cobj,vobj> ZpCompress;
 | 
						this->HaloGatherDir(source,XpCompress,Xm,face_idx);
 | 
				
			||||||
	this->HaloGatherDir(source,ZpCompress,dirs[2]);
 | 
						this->HaloGatherDir(source,YpCompress,Ym,face_idx);
 | 
				
			||||||
 | 
						this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
 | 
				
			||||||
	WilsonTpCompressor<cobj,vobj> TpCompress;
 | 
						this->HaloGatherDir(source,TpCompress,Tm,face_idx);
 | 
				
			||||||
	this->HaloGatherDir(source,TpCompress,dirs[3]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WilsonXmCompressor<cobj,vobj> XmCompress;
 | 
					 | 
				
			||||||
	this->HaloGatherDir(source,XmCompress,dirs[4]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WilsonYmCompressor<cobj,vobj> YmCompress;
 | 
					 | 
				
			||||||
	this->HaloGatherDir(source,YmCompress,dirs[5]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WilsonZmCompressor<cobj,vobj> ZmCompress;
 | 
					 | 
				
			||||||
	this->HaloGatherDir(source,ZmCompress,dirs[6]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	WilsonTmCompressor<cobj,vobj> TmCompress;
 | 
					 | 
				
			||||||
	this->HaloGatherDir(source,TmCompress,dirs[7]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	assert(this->u_comm_offset==this->_unified_buffer_size);
 | 
					 | 
				
			||||||
	this->halogtime+=usecond();
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					      this->face_table_computed=1;
 | 
				
			||||||
 | 
					      assert(this->u_comm_offset==this->_unified_buffer_size);
 | 
				
			||||||
 | 
					      this->halogtime+=usecond();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -403,7 +403,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
				
			|||||||
  int LLs = in._grid->_rdimensions[0];
 | 
					  int LLs = in._grid->_rdimensions[0];
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  DhopCommTime-=usecond();
 | 
					  DhopCommTime-=usecond();
 | 
				
			||||||
  st.HaloExchange(in,compressor);
 | 
					  st.HaloExchangeOpt(in,compressor);
 | 
				
			||||||
  DhopCommTime+=usecond();
 | 
					  DhopCommTime+=usecond();
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  DhopComputeTime-=usecond();
 | 
					  DhopComputeTime-=usecond();
 | 
				
			||||||
@@ -447,13 +447,11 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
 | 
				
			|||||||
#else
 | 
					#else
 | 
				
			||||||
#pragma omp parallel 
 | 
					#pragma omp parallel 
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      for(int i=0;i<10;i++){
 | 
					 | 
				
			||||||
      int me, myoff,mywork;
 | 
					 | 
				
			||||||
      int len = U._grid->oSites();
 | 
					      int len = U._grid->oSites();
 | 
				
			||||||
 | 
					      int me, myoff,mywork;
 | 
				
			||||||
      GridThread::GetWorkBarrier(len,me, mywork,myoff);
 | 
					      GridThread::GetWorkBarrier(len,me, mywork,myoff);
 | 
				
			||||||
      int sF = LLs * myoff;
 | 
					      int sF = LLs * myoff;
 | 
				
			||||||
      Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out);
 | 
					      Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out);
 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user