mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Making running on Aurora more debuggable
This commit is contained in:
		@@ -269,7 +269,9 @@ public:
 | 
				
			|||||||
    RealD xscale = 2.0/(hi-lo);
 | 
					    RealD xscale = 2.0/(hi-lo);
 | 
				
			||||||
    RealD mscale = -(hi+lo)/(hi-lo);
 | 
					    RealD mscale = -(hi+lo)/(hi-lo);
 | 
				
			||||||
    Linop.HermOp(T0,y);
 | 
					    Linop.HermOp(T0,y);
 | 
				
			||||||
 | 
					    grid->Barrier();
 | 
				
			||||||
    axpby(T1,xscale,mscale,y,in);
 | 
					    axpby(T1,xscale,mscale,y,in);
 | 
				
			||||||
 | 
					    grid->Barrier();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // sum = .5 c[0] T0 + c[1] T1
 | 
					    // sum = .5 c[0] T0 + c[1] T1
 | 
				
			||||||
    //    out = ()*T0 + Coeffs[1]*T1;
 | 
					    //    out = ()*T0 + Coeffs[1]*T1;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -260,32 +260,39 @@ CartesianCommunicator::~CartesianCommunicator()
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
#ifdef USE_GRID_REDUCTION
 | 
					#ifdef USE_GRID_REDUCTION
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
					void CartesianCommunicator::GlobalSum(float &f){
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("GlobalSumP2P");
 | 
				
			||||||
  CartesianCommunicator::GlobalSumP2P(f);
 | 
					  CartesianCommunicator::GlobalSumP2P(f);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
					void CartesianCommunicator::GlobalSum(double &d)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("GlobalSumP2P");
 | 
				
			||||||
  CartesianCommunicator::GlobalSumP2P(d);
 | 
					  CartesianCommunicator::GlobalSumP2P(d);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(float &f){
 | 
					void CartesianCommunicator::GlobalSum(float &f){
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllReduce");
 | 
				
			||||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(double &d)
 | 
					void CartesianCommunicator::GlobalSum(double &d)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllReduce");
 | 
				
			||||||
  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
					  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
					void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllReduce");
 | 
				
			||||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
					void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllReduce");
 | 
				
			||||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
 | 
					void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllReduceVector");
 | 
				
			||||||
  int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -794,6 +801,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void CartesianCommunicator::StencilBarrier(void)
 | 
					void CartesianCommunicator::StencilBarrier(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("NodeBarrier");
 | 
				
			||||||
  MPI_Barrier  (ShmComm);
 | 
					  MPI_Barrier  (ShmComm);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
					//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
				
			||||||
@@ -801,11 +809,13 @@ void CartesianCommunicator::StencilBarrier(void)
 | 
				
			|||||||
//}
 | 
					//}
 | 
				
			||||||
void CartesianCommunicator::Barrier(void)
 | 
					void CartesianCommunicator::Barrier(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("GridBarrier");
 | 
				
			||||||
  int ierr = MPI_Barrier(communicator);
 | 
					  int ierr = MPI_Barrier(communicator);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
					void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("Broadcast");
 | 
				
			||||||
  int ierr=MPI_Bcast(data,
 | 
					  int ierr=MPI_Bcast(data,
 | 
				
			||||||
		     bytes,
 | 
							     bytes,
 | 
				
			||||||
		     MPI_BYTE,
 | 
							     MPI_BYTE,
 | 
				
			||||||
@@ -824,6 +834,7 @@ void CartesianCommunicator::BarrierWorld(void){
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
					void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("BroadcastWorld");
 | 
				
			||||||
  int ierr= MPI_Bcast(data,
 | 
					  int ierr= MPI_Bcast(data,
 | 
				
			||||||
		      bytes,
 | 
							      bytes,
 | 
				
			||||||
		      MPI_BYTE,
 | 
							      MPI_BYTE,
 | 
				
			||||||
@@ -846,6 +857,7 @@ void CartesianCommunicator::AllToAll(int dim,void  *in,void *out,uint64_t words,
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
 | 
					void CartesianCommunicator::AllToAll(void  *in,void *out,uint64_t words,uint64_t bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  FlightRecorder::StepLog("AllToAll");
 | 
				
			||||||
  // MPI is a pain and uses "int" arguments
 | 
					  // MPI is a pain and uses "int" arguments
 | 
				
			||||||
  // 64*64*64*128*16 == 500Million elements of data.
 | 
					  // 64*64*64*128*16 == 500Million elements of data.
 | 
				
			||||||
  // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
 | 
					  // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -990,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SharedMemoryTest();
 | 
					  //  SharedMemoryTest();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////
 | 
				
			||||||
// On node barrier
 | 
					// On node barrier
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -396,6 +396,7 @@ public:
 | 
				
			|||||||
					Packets[i].from_rank,Packets[i].do_recv,
 | 
										Packets[i].from_rank,Packets[i].do_recv,
 | 
				
			||||||
					Packets[i].xbytes,Packets[i].rbytes,i);
 | 
										Packets[i].xbytes,Packets[i].rbytes,i);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    FlightRecorder::StepLog("Communicate begin has finished");
 | 
				
			||||||
    // Get comms started then run checksums
 | 
					    // Get comms started then run checksums
 | 
				
			||||||
    // Having this PRIOR to the dslash seems to make Sunspot work... (!)
 | 
					    // Having this PRIOR to the dslash seems to make Sunspot work... (!)
 | 
				
			||||||
    for(int i=0;i<Packets.size();i++){
 | 
					    for(int i=0;i<Packets.size();i++){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -638,12 +638,11 @@ void Grid_debug_handler_init(void)
 | 
				
			|||||||
  sa.sa_flags    = SA_SIGINFO;
 | 
					  sa.sa_flags    = SA_SIGINFO;
 | 
				
			||||||
  //  sigaction(SIGSEGV,&sa,NULL);
 | 
					  //  sigaction(SIGSEGV,&sa,NULL);
 | 
				
			||||||
  sigaction(SIGTRAP,&sa,NULL);
 | 
					  sigaction(SIGTRAP,&sa,NULL);
 | 
				
			||||||
  sigaction(SIGBUS,&sa,NULL);
 | 
					  //  sigaction(SIGBUS,&sa,NULL);
 | 
				
			||||||
  //  sigaction(SIGUSR2,&sa,NULL);
 | 
					  //  sigaction(SIGUSR2,&sa,NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
 | 
					  //  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
 | 
				
			||||||
 | 
					  //  sigaction(SIGFPE,&sa,NULL);
 | 
				
			||||||
  sigaction(SIGFPE,&sa,NULL);
 | 
					 | 
				
			||||||
  sigaction(SIGKILL,&sa,NULL);
 | 
					  sigaction(SIGKILL,&sa,NULL);
 | 
				
			||||||
  sigaction(SIGILL,&sa,NULL);
 | 
					  sigaction(SIGILL,&sa,NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -74,7 +74,7 @@ template <class T> void writeFile(T& in, std::string const fname){
 | 
				
			|||||||
  Grid::emptyUserRecord record;
 | 
					  Grid::emptyUserRecord record;
 | 
				
			||||||
  Grid::ScidacWriter WR(in.Grid()->IsBoss());
 | 
					  Grid::ScidacWriter WR(in.Grid()->IsBoss());
 | 
				
			||||||
  WR.open(fname);
 | 
					  WR.open(fname);
 | 
				
			||||||
  WR.writeScidacFieldRecord(in,record,0);
 | 
					  WR.writeScidacFieldRecord(in,record,0); // Lexico
 | 
				
			||||||
  WR.close();
 | 
					  WR.close();
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  // What is the appropriate way to throw error?
 | 
					  // What is the appropriate way to throw error?
 | 
				
			||||||
@@ -190,8 +190,8 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    std::string ufile = file_pre + "U_" + std::to_string(tau) + "_" + file_post;
 | 
					    std::string ufile = file_pre + "U_" + std::to_string(tau) + "_" + file_post;
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      PeriodicGimplR::GaugeField Ucopy = U;
 | 
					      //      PeriodicGimplR::GaugeField Ucopy = U;
 | 
				
			||||||
      NerscIO::writeConfiguration(Ucopy,ufile);
 | 
					      //      NerscIO::writeConfiguration(Ucopy,ufile);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    RealD E = real(sum(R))/ RealD(U.Grid()->gSites());
 | 
					    RealD E = real(sum(R))/ RealD(U.Grid()->gSites());
 | 
				
			||||||
@@ -206,7 +206,7 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
  int t=WFPar.maxTau;
 | 
					  int t=WFPar.maxTau;
 | 
				
			||||||
  WF.smear(Uflow, Umu);
 | 
					  WF.smear(Uflow, Umu);
 | 
				
			||||||
  NerscIO::writeConfiguration(Uflow,filesmr);
 | 
					  //  NerscIO::writeConfiguration(Uflow,filesmr);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
 | 
					  RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
project(GridViewer)
 | 
					project(GridViewer)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
list(APPEND CMAKE_PREFIX_PATH "/Users/peterboyle/QCD/vtk/VTK-9.4.2-install/")
 | 
					list(APPEND CMAKE_PREFIX_PATH "/home/paboyle/Visualisation/install/")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
find_package(VTK COMPONENTS 
 | 
					find_package(VTK COMPONENTS 
 | 
				
			||||||
  CommonColor
 | 
					  CommonColor
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,10 +1,17 @@
 | 
				
			|||||||
libs=`grid-config --libs`
 | 
					export grid_config=/home/paboyle/GPT/install/bin/grid-config
 | 
				
			||||||
ldflags=`grid-config --ldflags`
 | 
					libs=`$grid_config --libs`
 | 
				
			||||||
cxxflags=`grid-config --cxxflags`
 | 
					ldflags=`$grid_config --ldflags`
 | 
				
			||||||
cxx=`grid-config --cxx`
 | 
					cxxflags=`$grid_config --cxxflags`
 | 
				
			||||||
cc=clang
 | 
					cxx=`$grid_config --cxx`
 | 
				
			||||||
 | 
					cc=icx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mkdir build
 | 
					mkdir build
 | 
				
			||||||
cd build
 | 
					cd build
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_C_COMPILER=$cc -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags 
 | 
					echo CC $cc
 | 
				
			||||||
 | 
					echo CXX $cxx
 | 
				
			||||||
 | 
					echo CXXFLAGS $cxxflags
 | 
				
			||||||
 | 
					echo LDFLAGS  $ldflags
 | 
				
			||||||
 | 
					echo LIBS  $libs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_C_COMPILER=$cc -DCMAKE_CXX_COMPILER="$cxx" -DCMAKE_CXX_FLAGS="$cxxflags "
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user