Skip to content

Fix deadlock in signal handler for segmentation fault recovery (includes cleanup)

Clean Importer requested to merge fix-segfault-recovery into master

Previously, safe/hyperstrict interpretation could end up in a deadlock after returning from the signal handler. This could be triggered with:

for i in $(seq 1 1000); do echo $i; ./test/GraphTest >/dev/null 2>/dev/null; done

The important commit here is 6c776c0b. The ret value which was allocated on the heap (though unnecessarily so) was never freed, causing free() to use signal-unsafe functionality and end up in a deadlock.

The present implementation is still not exactly conforming to man signal-safety, which says:

If a signal handler interrupts the execution of an unsafe function, and the handler terminates via a call to longjmp(3) or siglongjmp(3) and the program subsequently calls an unsafe function, then the behavior of the program is undefined.

It seems to work fine because for now the signal handler is not called during unsafe functions(?), or because 'undefined' means 'expected' for now. I'm hesitant to implement a possibly better scheme without failing test, but for reference, here are two/three other possible solutions I considered (before figuring out the malloc issue):

  • Using a separate thread for the interpreter and catching the SIGSEGV in the main thread. This is not possible, because you can only block asynchronous signals in another thread. Synchronous signals are always sent to the target thread.

  • Using getcontext where we now have sigsetjmp and using setcontext to return from the signal handler. It is unclear to me whether this is 'allowed'.

    WIP diff
    diff --git a/src/interpret.c b/src/interpret.c
    index 06a6d7d..1d62c61 100644
    --- a/src/interpret.c
    +++ b/src/interpret.c
    @@ -249,12 +249,19 @@ static BC_WORD *asp, *bsp, *csp, *hp = NULL;
     
     #include <setjmp.h>
     #ifdef POSIX
    +# ifndef __USE_GNU
    +#  define __USE_GNU
    +#  include <ucontext.h>
    +#  undef __USE_GNU
    +# else
    +#  include <ucontext.h>
    +# endif
     # include <signal.h>
     #endif
     
     #ifdef LINK_CLEAN_RUNTIME
     struct segfault_restore_points {
    -	jmp_buf restore_point;
    +	ucontext_t restore_point;
     	BC_WORD *host_a_ptr;
     	struct segfault_restore_points *prev;
     };
    @@ -264,6 +271,7 @@ static struct segfault_restore_points *segfault_restore_points=NULL;
     #ifdef POSIX
     # ifdef LINK_CLEAN_RUNTIME
     static struct sigaction old_segv_handler;
    +static int segv_detected=0;
     # endif
     static void handle_segv(int sig, siginfo_t *info, void *context) {
     # ifdef LINK_CLEAN_RUNTIME
    @@ -280,7 +288,8 @@ static void handle_segv(int sig, siginfo_t *info, void *context) {
     # endif
     	EPRINTF("Segmentation fault in interpreter\n");
     # ifdef LINK_CLEAN_RUNTIME
    -	siglongjmp(segfault_restore_points->restore_point, SIGSEGV);
    +	segv_detected=1;
    +	setcontext(&segfault_restore_points->restore_point);
     # else
     	exit(1);
     # endif
    @@ -432,7 +441,9 @@ int interpret(
     		new->host_a_ptr=ie->host->host_a_ptr;
     		segfault_restore_points=new;
     # ifdef POSIX
    -		if (sigsetjmp(new->restore_point, 1) != 0) {
    +		if (getcontext(&new->restore_point))
    +			perror("getcontext");
    +		if (segv_detected) {
     # else
     		if (setjmp(new->restore_point) != 0) {
     # endif
  • Overwrite the uc_mcontext field of the third argument to the signal handler to update it with a context from getcontext. This is similar to the previous approach, but has the benefit that not the entire ucontext_t has to be stored to create a restore point (this optimization is not reflected in the WIP diff below however).

    WIP diff
    diff --git a/src/interpret.c b/src/interpret.c
    index df119c2..0c8cc40 100644
    --- a/src/interpret.c
    +++ b/src/interpret.c
    @@ -249,12 +249,19 @@ static BC_WORD *asp, *bsp, *csp, *hp = NULL;
     
     #include <setjmp.h>
     #ifdef POSIX
    +# ifdef __USE_GNU
    +#  include <ucontext.h>
    +# else
    +#  define __USE_GNU
    +#  include <ucontext.h>
    +#  undef __USE_GNU
    +# endif
     # include <signal.h>
     #endif
     
     #ifdef LINK_CLEAN_RUNTIME
     struct segfault_restore_points {
    -	jmp_buf restore_point;
    +	ucontext_t restore_point;
     	BC_WORD *host_a_ptr;
     	struct segfault_restore_points *prev;
     };
    @@ -265,12 +272,12 @@ static struct segfault_restore_points *segfault_restore_points=NULL;
     # ifdef LINK_CLEAN_RUNTIME
     static struct sigaction old_segv_handler;
     # endif
    -static void handle_segv(int sig, siginfo_t *info, void *context) {
    +static void handle_segv(int sig, siginfo_t *info, void *vcontext) {
     # ifdef LINK_CLEAN_RUNTIME
     	if (segfault_restore_points==NULL) {
     		if (old_segv_handler.sa_handler!=SIG_DFL && old_segv_handler.sa_handler!=SIG_IGN) {
     			if (old_segv_handler.sa_flags & SA_SIGINFO)
    -				old_segv_handler.sa_sigaction(sig,info,context);
    +				old_segv_handler.sa_sigaction(sig,info,vcontext);
     			else
     				old_segv_handler.sa_handler(sig);
     		}
    @@ -278,9 +285,40 @@ static void handle_segv(int sig, siginfo_t *info, void *context) {
     	}
     	interpret_error=&e__ABC_PInterpreter__dDV__StackOverflow;
     # endif
    -	EPRINTF("Segmentation fault in interpreter\n");
    +	//EPRINTF("Segmentation fault in interpreter\n");
     # ifdef LINK_CLEAN_RUNTIME
    -	siglongjmp(segfault_restore_points->restore_point, SIGSEGV);
    +	ucontext_t *uc=(ucontext_t*)vcontext;
    +	mcontext_t *mc=&uc->uc_mcontext;
    +	mcontext_t *restore_mc=&segfault_restore_points->restore_point.uc_mcontext;
    +	mc->gregs[REG_R8]     =restore_mc->gregs[REG_R8];
    +	mc->gregs[REG_R9]     =restore_mc->gregs[REG_R9];
    +	mc->gregs[REG_R10]    =restore_mc->gregs[REG_R10];
    +	mc->gregs[REG_R11]    =restore_mc->gregs[REG_R11];
    +	mc->gregs[REG_R12]    =restore_mc->gregs[REG_R12];
    +	mc->gregs[REG_R13]    =restore_mc->gregs[REG_R13];
    +	mc->gregs[REG_R14]    =restore_mc->gregs[REG_R14];
    +	mc->gregs[REG_R15]    =restore_mc->gregs[REG_R15];
    +	mc->gregs[REG_RDI]    =restore_mc->gregs[REG_RDI];
    +	mc->gregs[REG_RSI]    =restore_mc->gregs[REG_RSI];
    +	mc->gregs[REG_RBP]    =restore_mc->gregs[REG_RBP];
    +	mc->gregs[REG_RBX]    =restore_mc->gregs[REG_RBX];
    +	mc->gregs[REG_RDX]    =restore_mc->gregs[REG_RDX];
    +	mc->gregs[REG_RAX]    =1; // return 1 from getcontext
    +	mc->gregs[REG_RCX]    =restore_mc->gregs[REG_RCX];
    +	mc->gregs[REG_RSP]    =restore_mc->gregs[REG_RSP];
    +	mc->gregs[REG_RIP]    =restore_mc->gregs[REG_RIP];
    +	//mc->gregs[REG_EFL]    =restore_mc->gregs[REG_EFL];
    +	//fprintf(stderr,"flags: %llx\n",restore_mc->gregs[REG_EFL]);
    +	//mc->gregs[REG_CSGSFS] =restore_mc->gregs[REG_CSGSFS]; // not set by getcontext()
    +	//mc->gregs[REG_ERR]    =restore_mc->gregs[REG_ERR];
    +	//mc->gregs[REG_TRAPNO] =restore_mc->gregs[REG_TRAPNO];
    +	//mc->gregs[REG_OLDMASK]=restore_mc->gregs[REG_OLDMASK];
    +	//mc->gregs[REG_CR2]    =restore_mc->gregs[REG_CR2];
    +	//stack_t ss;
    +	//sigaltstack(NULL,&ss);
    +	//ss.ss_flags&=~SS_ONSTACK;
    +	//sigaltstack(&ss,NULL);
    +	//uc->uc_stack.ss_flags&=~SS_ONSTACK;
     # else
     	exit(1);
     # endif
    @@ -432,7 +470,7 @@ int interpret(
     		new->host_a_ptr=ie->host->host_a_ptr;
     		segfault_restore_points=new;
     # ifdef POSIX
    -		if (sigsetjmp(new->restore_point, 1) != 0) {
    +		if (getcontext(&new->restore_point) != 0) {
     # else
     		if (setjmp(new->restore_point) != 0) {
     # endif
Edited by Clean Importer

Merge request reports