mirror of
https://github.com/paboyle/Grid.git
synced 2026-05-05 09:44:33 +01:00
Making running on Aurora more debuggable
This commit is contained in:
@@ -269,7 +269,9 @@ public:
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
grid->Barrier();
|
||||
axpby(T1,xscale,mscale,y,in);
|
||||
grid->Barrier();
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
// out = ()*T0 + Coeffs[1]*T1;
|
||||
|
||||
@@ -260,32 +260,39 @@ CartesianCommunicator::~CartesianCommunicator()
|
||||
}
|
||||
#ifdef USE_GRID_REDUCTION
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(f);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("GlobalSumP2P");
|
||||
CartesianCommunicator::GlobalSumP2P(d);
|
||||
}
|
||||
#else
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
#endif
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
FlightRecorder::StepLog("AllReduce");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
|
||||
FlightRecorder::StepLog("AllReduceVector");
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
@@ -794,6 +801,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("NodeBarrier");
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
@@ -801,11 +809,13 @@ void CartesianCommunicator::StencilBarrier(void)
|
||||
//}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
FlightRecorder::StepLog("GridBarrier");
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("Broadcast");
|
||||
int ierr=MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -824,6 +834,7 @@ void CartesianCommunicator::BarrierWorld(void){
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("BroadcastWorld");
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
@@ -846,6 +857,7 @@ void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
FlightRecorder::StepLog("AllToAll");
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
|
||||
@@ -990,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
}
|
||||
#endif
|
||||
|
||||
SharedMemoryTest();
|
||||
// SharedMemoryTest();
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
|
||||
@@ -396,6 +396,7 @@ public:
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||
}
|
||||
FlightRecorder::StepLog("Communicate begin has finished");
|
||||
// Get comms started then run checksums
|
||||
// Having this PRIOR to the dslash seems to make Sunspot work... (!)
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
|
||||
+3
-4
@@ -638,12 +638,11 @@ void Grid_debug_handler_init(void)
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
// sigaction(SIGSEGV,&sa,NULL);
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
// sigaction(SIGBUS,&sa,NULL);
|
||||
// sigaction(SIGUSR2,&sa,NULL);
|
||||
|
||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
|
||||
sigaction(SIGFPE,&sa,NULL);
|
||||
// feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
// sigaction(SIGFPE,&sa,NULL);
|
||||
sigaction(SIGKILL,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user