mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 05:24:32 +00:00 
			
		
		
		
	Bug fixes for cases that physics code couldn't hit but latent
and discovered on KNL (long vector, y SIMD dir) and checker dir set to y. Remove the assertions on these code paths now they are tested.
This commit is contained in:
		@@ -1,5 +1,4 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
    /*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -53,8 +52,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
 | 
				
			|||||||
    cbmask = 0x3;
 | 
					    cbmask = 0x3;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
					  int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
					  int e1=rhs._grid->_slice_nblock[dimension];
 | 
				
			||||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
					  int e2=rhs._grid->_slice_block[dimension];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -74,7 +72,7 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
     for(int n=0;n<e1;n++){
 | 
					     for(int n=0;n<e1;n++){
 | 
				
			||||||
       for(int b=0;b<e2;b++){
 | 
					       for(int b=0;b<e2;b++){
 | 
				
			||||||
	 int o  = n*stride;
 | 
						 int o  = n*stride;
 | 
				
			||||||
	 int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
 | 
						 int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
 | 
				
			||||||
	 if ( ocb &cbmask ) {
 | 
						 if ( ocb &cbmask ) {
 | 
				
			||||||
	   table.push_back(std::pair<int,int> (bo++,o+b));
 | 
						   table.push_back(std::pair<int,int> (bo++,o+b));
 | 
				
			||||||
	 }
 | 
						 }
 | 
				
			||||||
@@ -105,29 +103,30 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
 | 
				
			|||||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
					  int e1=rhs._grid->_slice_nblock[dimension];
 | 
				
			||||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
					  int e2=rhs._grid->_slice_block[dimension];
 | 
				
			||||||
  int n1=rhs._grid->_slice_stride[dimension];
 | 
					  int n1=rhs._grid->_slice_stride[dimension];
 | 
				
			||||||
  int n2=rhs._grid->_slice_block[dimension];
 | 
					 | 
				
			||||||
  if ( cbmask ==0x3){
 | 
					  if ( cbmask ==0x3){
 | 
				
			||||||
PARALLEL_NESTED_LOOP2
 | 
					PARALLEL_NESTED_LOOP2
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int o      =   n*n1;
 | 
						int o      =   n*n1;
 | 
				
			||||||
	int offset = b+n*n2;
 | 
						int offset = b+n*e2;
 | 
				
			||||||
	cobj temp =compress(rhs._odata[so+o+b]);
 | 
						cobj temp =compress(rhs._odata[so+o+b]);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	extract<cobj>(temp,pointers,offset);
 | 
						extract<cobj>(temp,pointers,offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  } else { 
 | 
					  } else { 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(0); //Fixme think this is buggy
 | 
					    // Case of SIMD split AND checker dim cannot currently be hit, except in 
 | 
				
			||||||
 | 
					    // Test_cshift_red_black code.
 | 
				
			||||||
 | 
					    std::cout << " Dense packed buffer WARNING " <<std::endl;
 | 
				
			||||||
 | 
					PARALLEL_NESTED_LOOP2
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
	int o=n*rhs._grid->_slice_stride[dimension];
 | 
					
 | 
				
			||||||
 | 
						int o=n*n1;
 | 
				
			||||||
	int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
 | 
						int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
 | 
				
			||||||
	int offset = b+n*rhs._grid->_slice_block[dimension];
 | 
						int offset = b+n*e2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ( ocb & cbmask ) {
 | 
						if ( ocb & cbmask ) {
 | 
				
			||||||
	  cobj temp =compress(rhs._odata[so+o+b]);
 | 
						  cobj temp =compress(rhs._odata[so+o+b]);
 | 
				
			||||||
@@ -171,6 +170,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
					  int e1=rhs._grid->_slice_nblock[dimension];
 | 
				
			||||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
					  int e2=rhs._grid->_slice_block[dimension];
 | 
				
			||||||
 | 
					  int stride=rhs._grid->_slice_stride[dimension];
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  if ( cbmask ==0x3 ) {
 | 
					  if ( cbmask ==0x3 ) {
 | 
				
			||||||
PARALLEL_NESTED_LOOP2
 | 
					PARALLEL_NESTED_LOOP2
 | 
				
			||||||
@@ -182,17 +182,22 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  } else { 
 | 
					  } else { 
 | 
				
			||||||
 | 
					    std::vector<std::pair<int,int> > table;
 | 
				
			||||||
    int bo=0;
 | 
					    int bo=0;
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
	int o   =n*rhs._grid->_slice_stride[dimension];
 | 
						int o   =n*rhs._grid->_slice_stride[dimension];
 | 
				
			||||||
	int bo  =n*rhs._grid->_slice_block[dimension];
 | 
					 | 
				
			||||||
	int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
						int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
				
			||||||
	if ( ocb & cbmask ) {
 | 
						if ( ocb & cbmask ) {
 | 
				
			||||||
	  rhs._odata[so+o+b]=buffer[bo++];
 | 
						  table.push_back(std::pair<int,int> (so+o+b,bo++));
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					PARALLEL_FOR_LOOP     
 | 
				
			||||||
 | 
					     for(int i=0;i<table.size();i++){
 | 
				
			||||||
 | 
					       //       std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
 | 
				
			||||||
 | 
					       rhs._odata[table[i].first]=buffer[table[i].second];
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -222,7 +227,11 @@ PARALLEL_NESTED_LOOP2
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  } else { 
 | 
					  } else { 
 | 
				
			||||||
    assert(0); // think this is buggy FIXME
 | 
					
 | 
				
			||||||
 | 
					    // Case of SIMD split AND checker dim cannot currently be hit, except in 
 | 
				
			||||||
 | 
					    // Test_cshift_red_black code.
 | 
				
			||||||
 | 
					    //    std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
 | 
				
			||||||
 | 
					    std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
	int o      = n*rhs._grid->_slice_stride[dimension];
 | 
						int o      = n*rhs._grid->_slice_stride[dimension];
 | 
				
			||||||
@@ -338,8 +347,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
				
			|||||||
  // Map to always positive shift modulo global full dimension.
 | 
					  // Map to always positive shift modulo global full dimension.
 | 
				
			||||||
  shift = (shift+fd)%fd;
 | 
					  shift = (shift+fd)%fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
 | 
					 | 
				
			||||||
  // the permute type
 | 
					  // the permute type
 | 
				
			||||||
 | 
					  ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
 | 
				
			||||||
  int permute_dim =grid->PermuteDim(dimension);
 | 
					  int permute_dim =grid->PermuteDim(dimension);
 | 
				
			||||||
  int permute_type=grid->PermuteType(dimension);
 | 
					  int permute_type=grid->PermuteType(dimension);
 | 
				
			||||||
  int permute_type_dist;
 | 
					  int permute_type_dist;
 | 
				
			||||||
@@ -348,7 +357,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    int o   = 0;
 | 
					    int o   = 0;
 | 
				
			||||||
    int bo  = x * grid->_ostride[dimension];
 | 
					    int bo  = x * grid->_ostride[dimension];
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    int cb= (cbmask==0x2)? Odd : Even;
 | 
					    int cb= (cbmask==0x2)? Odd : Even;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
					    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
				
			||||||
@@ -361,9 +369,23 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
				
			|||||||
    // wrap is whether sshift > rd.
 | 
					    // wrap is whether sshift > rd.
 | 
				
			||||||
    //  num is sshift mod rd.
 | 
					    //  num is sshift mod rd.
 | 
				
			||||||
    // 
 | 
					    // 
 | 
				
			||||||
 | 
					    //  shift 7
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    //  XoXo YcYc 
 | 
				
			||||||
 | 
					    //  oXoX cYcY
 | 
				
			||||||
 | 
					    //  XoXo YcYc
 | 
				
			||||||
 | 
					    //  oXoX cYcY
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    //  sshift -- 
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    //  XX YY ; 3
 | 
				
			||||||
 | 
					    //  XX YY ; 0
 | 
				
			||||||
 | 
					    //  XX YY ; 3
 | 
				
			||||||
 | 
					    //  XX YY ; 0
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
    int permute_slice=0;
 | 
					    int permute_slice=0;
 | 
				
			||||||
    if(permute_dim){
 | 
					    if(permute_dim){
 | 
				
			||||||
      int wrap = sshift/rd;
 | 
					      int wrap = sshift/rd; wrap=wrap % ly;
 | 
				
			||||||
      int  num = sshift%rd;
 | 
					      int  num = sshift%rd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if ( x< rd-num ) permute_slice=wrap;
 | 
					      if ( x< rd-num ) permute_slice=wrap;
 | 
				
			||||||
@@ -375,7 +397,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
				
			|||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
	permute_type_dist = permute_type;
 | 
						permute_type_dist = permute_type;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
 | 
					    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -74,7 +74,6 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &r
 | 
				
			|||||||
  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 | 
					  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //  std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
 | 
					  //  std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
 | 
				
			||||||
 | 
					 | 
				
			||||||
  if ( sshift[0] == sshift[1] ) {
 | 
					  if ( sshift[0] == sshift[1] ) {
 | 
				
			||||||
    //    std::cout << "Single pass Cshift_comms" <<std::endl;
 | 
					    //    std::cout << "Single pass Cshift_comms" <<std::endl;
 | 
				
			||||||
    Cshift_comms(ret,rhs,dimension,shift,0x3);
 | 
					    Cshift_comms(ret,rhs,dimension,shift,0x3);
 | 
				
			||||||
@@ -154,10 +153,14 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
 | 
				
			|||||||
			   (void *)&recv_buf[0],
 | 
								   (void *)&recv_buf[0],
 | 
				
			||||||
			   recv_from_rank,
 | 
								   recv_from_rank,
 | 
				
			||||||
			   bytes);
 | 
								   bytes);
 | 
				
			||||||
 | 
					      grid->Barrier();
 | 
				
			||||||
      //      for(int i=0;i<words;i++){
 | 
					      /*
 | 
				
			||||||
      //	std::cout << "SendRecv ["<<i<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << "  0x" << cbmask<<std::endl;
 | 
					      for(int i=0;i<send_buf.size();i++){
 | 
				
			||||||
      //      }
 | 
						assert(recv_buf.size()==buffer_size);
 | 
				
			||||||
 | 
						assert(send_buf.size()==buffer_size);
 | 
				
			||||||
 | 
						std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << "  0x" << cbmask<<std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      */
 | 
				
			||||||
      Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
 | 
					      Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -243,7 +246,14 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
 | 
				
			|||||||
			     (void *)&recv_buf_extract[i][0],
 | 
								     (void *)&recv_buf_extract[i][0],
 | 
				
			||||||
			     recv_from_rank,
 | 
								     recv_from_rank,
 | 
				
			||||||
			     bytes);
 | 
								     bytes);
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						for(int w=0;w<recv_buf_extract[i].size();w++){
 | 
				
			||||||
 | 
						  assert(recv_buf_extract[i].size()==buffer_size);
 | 
				
			||||||
 | 
						  assert(send_buf_extract[i].size()==buffer_size);
 | 
				
			||||||
 | 
						  std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w]  << cbmask<<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						*/	
 | 
				
			||||||
 | 
						grid->Barrier();
 | 
				
			||||||
	rpointers[i] = &recv_buf_extract[i][0];
 | 
						rpointers[i] = &recv_buf_extract[i][0];
 | 
				
			||||||
      } else { 
 | 
					      } else { 
 | 
				
			||||||
	rpointers[i] = &send_buf_extract[nbr_lane][0];
 | 
						rpointers[i] = &send_buf_extract[nbr_lane][0];
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user