Fix deadlock in signal handler for segmentation fault recovery (includes cleanup)
Previously, safe/hyperstrict interpretation could end up in a deadlock after returning from the signal handler. This could be triggered with:
for i in $(seq 1 1000); do echo $i; ./test/GraphTest >/dev/null 2>/dev/null; done
The important commit here is 6c776c0b. The ret
value which was allocated on the heap (though unnecessarily so) was never freed, causing free()
to use signal-unsafe functionality and end up in a deadlock.
The present implementation is still not exactly conforming to man signal-safety
, which says:
If a signal handler interrupts the execution of an unsafe function, and the handler terminates via a call to
longjmp(3)
orsiglongjmp(3)
and the program subsequently calls an unsafe function, then the behavior of the program is undefined.
It seems to work fine because for now the signal handler is not called during unsafe functions(?), or because 'undefined' means 'expected' for now. I'm hesitant to implement a possibly better scheme without failing test, but for reference, here are two/three other possible solutions I considered (before figuring out the malloc
issue):
-
Using a separate thread for the interpreter and catching the
SIGSEGV
in the main thread. This is not possible, because you can only block asynchronous signals in another thread. Synchronous signals are always sent to the target thread. -
Using
getcontext
where we now havesigsetjmp
and usingsetcontext
to return from the signal handler. It is unclear to me whether this is 'allowed'.WIP diff
diff --git a/src/interpret.c b/src/interpret.c index 06a6d7d..1d62c61 100644 --- a/src/interpret.c +++ b/src/interpret.c @@ -249,12 +249,19 @@ static BC_WORD *asp, *bsp, *csp, *hp = NULL; #include <setjmp.h> #ifdef POSIX +# ifndef __USE_GNU +# define __USE_GNU +# include <ucontext.h> +# undef __USE_GNU +# else +# include <ucontext.h> +# endif # include <signal.h> #endif #ifdef LINK_CLEAN_RUNTIME struct segfault_restore_points { - jmp_buf restore_point; + ucontext_t restore_point; BC_WORD *host_a_ptr; struct segfault_restore_points *prev; }; @@ -264,6 +271,7 @@ static struct segfault_restore_points *segfault_restore_points=NULL; #ifdef POSIX # ifdef LINK_CLEAN_RUNTIME static struct sigaction old_segv_handler; +static int segv_detected=0; # endif static void handle_segv(int sig, siginfo_t *info, void *context) { # ifdef LINK_CLEAN_RUNTIME @@ -280,7 +288,8 @@ static void handle_segv(int sig, siginfo_t *info, void *context) { # endif EPRINTF("Segmentation fault in interpreter\n"); # ifdef LINK_CLEAN_RUNTIME - siglongjmp(segfault_restore_points->restore_point, SIGSEGV); + segv_detected=1; + setcontext(&segfault_restore_points->restore_point); # else exit(1); # endif @@ -432,7 +441,9 @@ int interpret( new->host_a_ptr=ie->host->host_a_ptr; segfault_restore_points=new; # ifdef POSIX - if (sigsetjmp(new->restore_point, 1) != 0) { + if (getcontext(&new->restore_point)) + perror("getcontext"); + if (segv_detected) { # else if (setjmp(new->restore_point) != 0) { # endif
-
Overwrite the
uc_mcontext
field of the third argument to the signal handler to update it with a context fromgetcontext
. This is similar to the previous approach, but has the benefit that not the entireucontext_t
has to be stored to create a restore point (this optimization is not reflected in the WIP diff below however).WIP diff
diff --git a/src/interpret.c b/src/interpret.c index df119c2..0c8cc40 100644 --- a/src/interpret.c +++ b/src/interpret.c @@ -249,12 +249,19 @@ static BC_WORD *asp, *bsp, *csp, *hp = NULL; #include <setjmp.h> #ifdef POSIX +# ifdef __USE_GNU +# include <ucontext.h> +# else +# define __USE_GNU +# include <ucontext.h> +# undef __USE_GNU +# endif # include <signal.h> #endif #ifdef LINK_CLEAN_RUNTIME struct segfault_restore_points { - jmp_buf restore_point; + ucontext_t restore_point; BC_WORD *host_a_ptr; struct segfault_restore_points *prev; }; @@ -265,12 +272,12 @@ static struct segfault_restore_points *segfault_restore_points=NULL; # ifdef LINK_CLEAN_RUNTIME static struct sigaction old_segv_handler; # endif -static void handle_segv(int sig, siginfo_t *info, void *context) { +static void handle_segv(int sig, siginfo_t *info, void *vcontext) { # ifdef LINK_CLEAN_RUNTIME if (segfault_restore_points==NULL) { if (old_segv_handler.sa_handler!=SIG_DFL && old_segv_handler.sa_handler!=SIG_IGN) { if (old_segv_handler.sa_flags & SA_SIGINFO) - old_segv_handler.sa_sigaction(sig,info,context); + old_segv_handler.sa_sigaction(sig,info,vcontext); else old_segv_handler.sa_handler(sig); } @@ -278,9 +285,40 @@ static void handle_segv(int sig, siginfo_t *info, void *context) { } interpret_error=&e__ABC_PInterpreter__dDV__StackOverflow; # endif - EPRINTF("Segmentation fault in interpreter\n"); + //EPRINTF("Segmentation fault in interpreter\n"); # ifdef LINK_CLEAN_RUNTIME - siglongjmp(segfault_restore_points->restore_point, SIGSEGV); + ucontext_t *uc=(ucontext_t*)vcontext; + mcontext_t *mc=&uc->uc_mcontext; + mcontext_t *restore_mc=&segfault_restore_points->restore_point.uc_mcontext; + mc->gregs[REG_R8] =restore_mc->gregs[REG_R8]; + mc->gregs[REG_R9] =restore_mc->gregs[REG_R9]; + mc->gregs[REG_R10] =restore_mc->gregs[REG_R10]; + mc->gregs[REG_R11] =restore_mc->gregs[REG_R11]; + mc->gregs[REG_R12] =restore_mc->gregs[REG_R12]; + mc->gregs[REG_R13] =restore_mc->gregs[REG_R13]; + mc->gregs[REG_R14] =restore_mc->gregs[REG_R14]; + mc->gregs[REG_R15] =restore_mc->gregs[REG_R15]; + mc->gregs[REG_RDI] =restore_mc->gregs[REG_RDI]; + mc->gregs[REG_RSI] =restore_mc->gregs[REG_RSI]; + mc->gregs[REG_RBP] =restore_mc->gregs[REG_RBP]; + mc->gregs[REG_RBX] =restore_mc->gregs[REG_RBX]; + mc->gregs[REG_RDX] =restore_mc->gregs[REG_RDX]; + mc->gregs[REG_RAX] =1; // return 1 from getcontext + mc->gregs[REG_RCX] =restore_mc->gregs[REG_RCX]; + mc->gregs[REG_RSP] =restore_mc->gregs[REG_RSP]; + mc->gregs[REG_RIP] =restore_mc->gregs[REG_RIP]; + //mc->gregs[REG_EFL] =restore_mc->gregs[REG_EFL]; + //fprintf(stderr,"flags: %llx\n",restore_mc->gregs[REG_EFL]); + //mc->gregs[REG_CSGSFS] =restore_mc->gregs[REG_CSGSFS]; // not set by getcontext() + //mc->gregs[REG_ERR] =restore_mc->gregs[REG_ERR]; + //mc->gregs[REG_TRAPNO] =restore_mc->gregs[REG_TRAPNO]; + //mc->gregs[REG_OLDMASK]=restore_mc->gregs[REG_OLDMASK]; + //mc->gregs[REG_CR2] =restore_mc->gregs[REG_CR2]; + //stack_t ss; + //sigaltstack(NULL,&ss); + //ss.ss_flags&=~SS_ONSTACK; + //sigaltstack(&ss,NULL); + //uc->uc_stack.ss_flags&=~SS_ONSTACK; # else exit(1); # endif @@ -432,7 +470,7 @@ int interpret( new->host_a_ptr=ie->host->host_a_ptr; segfault_restore_points=new; # ifdef POSIX - if (sigsetjmp(new->restore_point, 1) != 0) { + if (getcontext(&new->restore_point) != 0) { # else if (setjmp(new->restore_point) != 0) { # endif