@@ -314,6 +314,8 @@ int exc_reg_is_write_fault(uintptr_t esr) {
314314#if defined(HAVE_MACH )
315315#include "signals-mach.c"
316316#else
317+ #include <poll.h>
318+ #include <sys/eventfd.h>
317319
318320int jl_lock_stackwalk (void )
319321{
@@ -439,17 +441,13 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
439441 }
440442}
441443
442- #if !defined(JL_DISABLE_LIBUNWIND )
443- static bt_context_t * signal_context ;
444- pthread_mutex_t in_signal_lock ;
445- static pthread_cond_t exit_signal_cond ;
446- static pthread_cond_t signal_caught_cond ;
444+ pthread_mutex_t in_signal_lock ; // shared with jl_delete_thread
445+ static bt_context_t * signal_context ; // protected by in_signal_lock
446+ static int exit_signal_cond = -1 ;
447+ static int signal_caught_cond = -1 ;
447448
448449int jl_thread_suspend_and_get_state (int tid , int timeout , bt_context_t * ctx )
449450{
450- struct timespec ts ;
451- clock_gettime (CLOCK_REALTIME , & ts );
452- ts .tv_sec += timeout ;
453451 pthread_mutex_lock (& in_signal_lock );
454452 jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
455453 jl_task_t * ct2 = ptls2 ? jl_atomic_load_relaxed (& ptls2 -> current_task ) : NULL ;
@@ -458,48 +456,51 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
458456 pthread_mutex_unlock (& in_signal_lock );
459457 return 0 ;
460458 }
461- jl_atomic_store_release (& ptls2 -> signal_request , 1 );
462- pthread_kill (ptls2 -> system_id , SIGUSR2 );
463- // wait for thread to acknowledge
464- int err = pthread_cond_timedwait (& signal_caught_cond , & in_signal_lock , & ts );
465- if (err == ETIMEDOUT ) {
466- sig_atomic_t request = 1 ;
459+ sig_atomic_t request = 0 ;
460+ if (!jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 1 )) {
461+ // something is wrong, or there is already a usr2 in flight elsewhere
462+ pthread_mutex_unlock (& in_signal_lock );
463+ return 0 ;
464+ }
465+ request = 1 ;
466+ int err = pthread_kill (ptls2 -> system_id , SIGUSR2 );
467+ // wait for thread to acknowledge or timeout
468+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
469+ if (err == 0 ) {
470+ do {
471+ err = poll (& event , 1 , timeout * 1000 );
472+ } while (err == -1 && errno == EINTR );
473+ }
474+ if ((event .revents & POLLIN ) == 0 ) {
475+ // not ready after timeout: try to cancel this request
467476 if (jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 0 )) {
468477 pthread_mutex_unlock (& in_signal_lock );
469478 return 0 ;
470479 }
471- // Request is either now 0 (meaning the other thread is waiting for
472- // exit_signal_cond already),
473- // Or it is now -1 (meaning the other thread
474- // is waiting for in_signal_lock, and we need to release that lock
475- // here for a bit, until the other thread has a chance to get to the
476- // exit_signal_cond)
477- if (request == -1 ) {
478- err = pthread_cond_wait (& signal_caught_cond , & in_signal_lock );
479- assert (!err );
480- }
481480 }
481+ eventfd_t got ;
482+ do {
483+ err = read (signal_caught_cond , & got , sizeof (eventfd_t ));
484+ } while (err == -1 && errno == EINTR );
485+ if (err != sizeof (eventfd_t )) abort ();
486+ assert (got == 1 ); (void ) got ;
482487 // Now the other thread is waiting on exit_signal_cond (verify that here by
483488 // checking it is 0, and add an acquire barrier for good measure)
484- int request = jl_atomic_load_acquire (& ptls2 -> signal_request );
489+ request = jl_atomic_load_acquire (& ptls2 -> signal_request );
485490 assert (request == 0 ); (void ) request ;
486- jl_atomic_store_release (& ptls2 -> signal_request , 1 ); // prepare to resume normally
491+ jl_atomic_store_release (& ptls2 -> signal_request , 4 ); // prepare to resume normally, but later code may change this
487492 * ctx = * signal_context ;
488493 return 1 ;
489494}
490495
491496void jl_thread_resume (int tid )
492497{
493- jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
494- pthread_cond_broadcast (& exit_signal_cond );
495- pthread_cond_wait (& signal_caught_cond , & in_signal_lock ); // wait for thread to acknowledge (so that signal_request doesn't get mixed up)
496- // The other thread is waiting to leave exit_signal_cond (verify that here by
497- // checking it is 0, and add an acquire barrier for good measure)
498- int request = jl_atomic_load_acquire (& ptls2 -> signal_request );
499- assert (request == 0 ); (void ) request ;
498+ int err ;
499+ eventfd_t got = 1 ;
500+ err = write (exit_signal_cond , & got , sizeof (eventfd_t ));
501+ if (err != sizeof (eventfd_t )) abort ();
500502 pthread_mutex_unlock (& in_signal_lock );
501503}
502- #endif
503504
504505// Throw jl_interrupt_exception if the master thread is in a signal async region
505506// or if SIGINT happens too often.
@@ -508,9 +509,11 @@ static void jl_try_deliver_sigint(void)
508509 jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[0 ];
509510 jl_safepoint_enable_sigint ();
510511 jl_wake_libuv ();
512+ pthread_mutex_lock (& in_signal_lock );
511513 jl_atomic_store_release (& ptls2 -> signal_request , 2 );
512514 // This also makes sure `sleep` is aborted.
513515 pthread_kill (ptls2 -> system_id , SIGUSR2 );
516+ pthread_mutex_unlock (& in_signal_lock );
514517}
515518
516519// Write only by signal handling thread, read only by main thread
@@ -543,12 +546,12 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
543546}
544547
545548// request:
546- // -1: beginning processing [invalid outside here]
547549// 0: nothing [not from here]
548- // 1: get state
550+ // 1: get state & wait for request
549551// 2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
550552// is reached
551553// 3: raise `thread0_exit_signo` and try to exit
554+ // 4: no-op
552555void usr2_handler (int sig , siginfo_t * info , void * ctx )
553556{
554557 jl_task_t * ct = jl_get_current_task ();
@@ -559,25 +562,21 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
559562 return ;
560563 int errno_save = errno ;
561564 // acknowledge that we saw the signal_request
562- sig_atomic_t request = jl_atomic_exchange (& ptls -> signal_request , -1 );
563- #if !defined(JL_DISABLE_LIBUNWIND )
565+ sig_atomic_t request = jl_atomic_exchange (& ptls -> signal_request , 0 );
564566 if (request == 1 ) {
565- pthread_mutex_lock (& in_signal_lock );
566567 signal_context = jl_to_bt_context (ctx );
567- // acknowledge that we set the signal_caught_cond broadcast
568+ int err ;
569+ eventfd_t got = 1 ;
570+ err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
571+ if (err != sizeof (eventfd_t )) abort ();
572+ do {
573+ err = read (exit_signal_cond , & got , sizeof (eventfd_t ));
574+ } while (err == -1 && errno == EINTR );
575+ if (err != sizeof (eventfd_t )) abort ();
576+ assert (got == 1 );
568577 request = jl_atomic_exchange (& ptls -> signal_request , 0 );
569- assert (request == -1 ); (void ) request ;
570- pthread_cond_broadcast (& signal_caught_cond );
571- pthread_cond_wait (& exit_signal_cond , & in_signal_lock );
572- request = jl_atomic_exchange (& ptls -> signal_request , 0 );
573- assert (request == 1 || request == 3 );
574- // acknowledge that we got the resume signal
575- pthread_cond_broadcast (& signal_caught_cond );
576- pthread_mutex_unlock (& in_signal_lock );
578+ assert (request == 2 || request == 3 || request == 4 );
577579 }
578- else
579- #endif
580- jl_atomic_exchange (& ptls -> signal_request , 0 ); // returns -1
581580 if (request == 2 ) {
582581 int force = jl_check_force_sigint ();
583582 if (force || (!ptls -> defer_signal && ptls -> io_wait )) {
@@ -1038,10 +1037,12 @@ void restore_signals(void)
10381037 jl_sigsetset (& sset );
10391038 pthread_sigmask (SIG_SETMASK , & sset , 0 );
10401039
1041- #if !defined(HAVE_MACH ) && !defined(JL_DISABLE_LIBUNWIND )
1040+ #if !defined(HAVE_MACH )
1041+ exit_signal_cond = eventfd (0 , EFD_CLOEXEC );
1042+ signal_caught_cond = eventfd (0 , EFD_CLOEXEC );
10421043 if (pthread_mutex_init (& in_signal_lock , NULL ) != 0 ||
1043- pthread_cond_init ( & exit_signal_cond , NULL ) != 0 ||
1044- pthread_cond_init ( & signal_caught_cond , NULL ) != 0 ) {
1044+ exit_signal_cond == -1 ||
1045+ signal_caught_cond == -1 ) {
10451046 jl_error ("SIGUSR pthread init failed" );
10461047 }
10471048#endif
0 commit comments