mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	GET not put has kinder barrier sequence for NVLINK type access as when
GET is done, I can use it without barrier. Moves a barrier to a nicer place, overlapped with DtoH DMA
This commit is contained in:
		@@ -501,6 +501,9 @@ public:
 | 
			
		||||
  void HaloGather(const Lattice<vobj> &source,compressor &compress)
 | 
			
		||||
  {
 | 
			
		||||
    //    accelerator_barrier();
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // I will overwrite my send buffers
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    _grid->StencilBarrier();// Synch shared memory on a single nodes
 | 
			
		||||
 | 
			
		||||
    assert(source.Grid()==_grid);
 | 
			
		||||
@@ -514,7 +517,12 @@ public:
 | 
			
		||||
      HaloGatherDir(source,compress,point,face_idx);
 | 
			
		||||
    }
 | 
			
		||||
    accelerator_barrier(); // All my local gathers are complete
 | 
			
		||||
    //    _grid->StencilBarrier();// Synch shared memory on a single nodes
 | 
			
		||||
#ifdef NVLINK_GET
 | 
			
		||||
    #warning "NVLINK_GET"
 | 
			
		||||
    _grid->StencilBarrier(); // He can now get mu local gather, I can get his
 | 
			
		||||
    // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
 | 
			
		||||
    // Or issue barrier AFTER the DMA is running
 | 
			
		||||
#endif    
 | 
			
		||||
    face_table_computed=1;
 | 
			
		||||
    assert(u_comm_offset==_unified_buffer_size);
 | 
			
		||||
  }
 | 
			
		||||
@@ -553,6 +561,7 @@ public:
 | 
			
		||||
	  coalescedWrite(to[j] ,coalescedRead(from [j]));
 | 
			
		||||
      });
 | 
			
		||||
      acceleratorFenceComputeStream();
 | 
			
		||||
      // Also fenced in WilsonKernels
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user