mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 20:57:06 +01:00
Almost working on Aurora
This commit is contained in:
@ -325,12 +325,12 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
// Start comms // Gather intranode and extra node differentiated??
|
||||
/////////////////////////////
|
||||
{
|
||||
std::cout << " WilsonFermion5D gather " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D gather " <<std::endl;
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
|
||||
}
|
||||
|
||||
std::cout << " WilsonFermion5D Communicate Begin " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Communicate Begin " <<std::endl;
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
auto id=traceStart("Communicate overlapped");
|
||||
st.CommunicateBegin(requests);
|
||||
@ -339,7 +339,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
// Overlap with comms
|
||||
/////////////////////////////
|
||||
{
|
||||
std::cout << " WilsonFermion5D Comms merge " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Comms merge " <<std::endl;
|
||||
GRID_TRACE("MergeSHM");
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
}
|
||||
@ -347,7 +347,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
/////////////////////////////
|
||||
// do the compute interior
|
||||
/////////////////////////////
|
||||
std::cout << " WilsonFermion5D Interior " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Interior " <<std::endl;
|
||||
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagInterior");
|
||||
@ -360,7 +360,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
/////////////////////////////
|
||||
// Complete comms
|
||||
/////////////////////////////
|
||||
std::cout << " WilsonFermion5D Comms Complete " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Comms Complete " <<std::endl;
|
||||
st.CommunicateComplete(requests);
|
||||
traceStop(id);
|
||||
|
||||
@ -368,13 +368,13 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
// do the compute exterior
|
||||
/////////////////////////////
|
||||
{
|
||||
std::cout << " WilsonFermion5D Comms Merge " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Comms Merge " <<std::endl;
|
||||
GRID_TRACE("Merge");
|
||||
st.CommsMerge(compressor);
|
||||
}
|
||||
|
||||
|
||||
std::cout << " WilsonFermion5D Exterior " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Exterior " <<std::endl;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagExterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
@ -382,7 +382,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
|
||||
GRID_TRACE("DhopExterior");
|
||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
}
|
||||
std::cout << " WilsonFermion5D Done " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Done " <<std::endl;
|
||||
}
|
||||
|
||||
|
||||
@ -397,13 +397,13 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
std::cout << " WilsonFermion5D Halo exch " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Halo exch " <<std::endl;
|
||||
{
|
||||
GRID_TRACE("HaloExchange");
|
||||
st.HaloExchangeOpt(in,compressor);
|
||||
}
|
||||
|
||||
std::cout << " WilsonFermion5D Dhop " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Dhop " <<std::endl;
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDag");
|
||||
@ -412,7 +412,7 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
GRID_TRACE("Dhop");
|
||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||
}
|
||||
std::cout << " WilsonFermion5D Done " <<std::endl;
|
||||
// std::cout << " WilsonFermion5D Done " <<std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
@ -365,8 +365,8 @@ public:
|
||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
// All GPU kernel tasks must complete
|
||||
// accelerator_barrier(); // All kernels should ALREADY be complete
|
||||
// _grid->StencilBarrier(); // Everyone is here, so noone running slow and still using receive buffer
|
||||
accelerator_barrier(); // All kernels should ALREADY be complete
|
||||
_grid->StencilBarrier(); // Everyone is here, so noone running slow and still using receive buffer
|
||||
// But the HaloGather had a barrier too.
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||
@ -390,8 +390,8 @@ public:
|
||||
if ( this->partialDirichlet ) DslashLogPartial();
|
||||
else if ( this->fullDirichlet ) DslashLogDirichlet();
|
||||
else DslashLogFull();
|
||||
// acceleratorCopySynchronise() is in the StencilSendToRecvFromComplete
|
||||
// accelerator_barrier();
|
||||
acceleratorCopySynchronise();// is in the StencilSendToRecvFromComplete
|
||||
accelerator_barrier();
|
||||
_grid->StencilBarrier();
|
||||
// run any checksums
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
@ -473,7 +473,7 @@ public:
|
||||
template<class compressor>
|
||||
void HaloGather(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
// accelerator_barrier();
|
||||
accelerator_barrier();
|
||||
_grid->StencilBarrier();// Synch shared memory on a single nodes
|
||||
|
||||
assert(source.Grid()==_grid);
|
||||
@ -487,6 +487,7 @@ public:
|
||||
HaloGatherDir(source,compress,point,face_idx);
|
||||
}
|
||||
accelerator_barrier(); // All my local gathers are complete
|
||||
_grid->StencilBarrier();// Synch shared memory on a single nodes
|
||||
face_table_computed=1;
|
||||
assert(u_comm_offset==_unified_buffer_size);
|
||||
}
|
||||
@ -653,7 +654,9 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||
surface_list.resize(surface_list_size);
|
||||
std::vector<int> surface_list_host(surface_list_size);
|
||||
int32_t ss=0;
|
||||
for(int site = 0 ;site< vol4;site++){
|
||||
int local = 1;
|
||||
@ -665,12 +668,12 @@ public:
|
||||
if(local == 0) {
|
||||
for(int s=0;s<Ls;s++){
|
||||
int idx=site*Ls+s;
|
||||
acceleratorPut(surface_list[ss],idx);
|
||||
surface_list_host[ss]= idx;
|
||||
ss++;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||
acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int));
|
||||
}
|
||||
/// Introduce a block structure and switch off comms on boundaries
|
||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||
|
@ -549,8 +549,31 @@ void GridLogLayout() {
|
||||
|
||||
void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
void Grid_usr_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
{
|
||||
fprintf(stderr,"Signal handler on host %s\n",hostname);
|
||||
fprintf(stderr,"Caught signal %d\n",si->si_signo);
|
||||
fprintf(stderr," mem address %llx\n",(unsigned long long)si->si_addr);
|
||||
fprintf(stderr," code %d\n",si->si_code);
|
||||
// x86 64bit
|
||||
#ifdef __linux__
|
||||
#ifdef __x86_64__
|
||||
ucontext_t * uc= (ucontext_t *)ptr;
|
||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||
fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip);
|
||||
#endif
|
||||
#endif
|
||||
fflush(stderr);
|
||||
BACKTRACEFP(stderr);
|
||||
fprintf(stderr,"Called backtrace\n");
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
return;
|
||||
}
|
||||
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
{
|
||||
fprintf(stderr,"Signal handler on host %s\n",hostname);
|
||||
fprintf(stderr,"Caught signal %d\n",si->si_signo);
|
||||
fprintf(stderr," mem address %llx\n",(unsigned long long)si->si_addr);
|
||||
fprintf(stderr," code %d\n",si->si_code);
|
||||
@ -561,7 +584,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
ucontext_t * uc= (ucontext_t *)ptr;
|
||||
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
|
||||
fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip);
|
||||
#define REG(A) printf(" %s %lx\n",#A,sc-> A);
|
||||
#define REG(A) fprintf(stderr," %s %lx\n",#A,sc-> A);
|
||||
REG(rdi);
|
||||
REG(rsi);
|
||||
REG(rbp);
|
||||
@ -594,8 +617,8 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
||||
|
||||
void Grid_exit_handler(void)
|
||||
{
|
||||
BACKTRACEFP(stdout);
|
||||
fflush(stdout);
|
||||
// BACKTRACEFP(stdout);
|
||||
// fflush(stdout);
|
||||
}
|
||||
void Grid_debug_handler_init(void)
|
||||
{
|
||||
@ -603,10 +626,10 @@ void Grid_debug_handler_init(void)
|
||||
sigemptyset (&sa.sa_mask);
|
||||
sa.sa_sigaction= Grid_sa_signal_handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sigaction(SIGSEGV,&sa,NULL);
|
||||
// sigaction(SIGSEGV,&sa,NULL);
|
||||
sigaction(SIGTRAP,&sa,NULL);
|
||||
sigaction(SIGBUS,&sa,NULL);
|
||||
sigaction(SIGUSR2,&sa,NULL);
|
||||
// sigaction(SIGUSR2,&sa,NULL);
|
||||
|
||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||
|
||||
@ -614,7 +637,14 @@ void Grid_debug_handler_init(void)
|
||||
sigaction(SIGKILL,&sa,NULL);
|
||||
sigaction(SIGILL,&sa,NULL);
|
||||
|
||||
atexit(Grid_exit_handler);
|
||||
// Non terminating SIGUSR1/2 handler
|
||||
struct sigaction sa_ping;
|
||||
sigemptyset (&sa_ping.sa_mask);
|
||||
sa_ping.sa_sigaction= Grid_usr_signal_handler;
|
||||
sa_ping.sa_flags = SA_SIGINFO;
|
||||
sigaction(SIGHUP,&sa_ping,NULL);
|
||||
|
||||
// atexit(Grid_exit_handler);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
Reference in New Issue
Block a user