From 2bc1061b05597e90377ef665057b9eb95a349a64 Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Tue, 3 Jun 2014 17:00:11 +1000 Subject: [PATCH] Fix: mainloop: Better handle the killing of processes in the act of exiting Also ensure that calling mainloop_child_kill() with a pid not in 'child_list' does not result in the last known child being killed. --- lib/common/mainloop.c | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index 69b4382b4b5..8a8b63fb44e 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -879,9 +879,7 @@ static int child_kill_helper(mainloop_child_t *child) { if (kill(-child->pid, SIGKILL) < 0) { - if (child->timeout) { - /* only show this warning if this is the second time - * we're attempting to kill the pid after it has already timed out. */ + if (errno != ESRCH) { crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } return -errno; @@ -927,6 +925,7 @@ child_waitpid(mainloop_child_t *child, int flags) rc = waitpid(child->pid, &status, flags); if(rc == 0) { + crm_perror(LOG_DEBUG, "wait(%d) = %d", child->pid, rc); return FALSE; } else if(rc != child->pid) { @@ -988,6 +987,7 @@ child_death_dispatch(int signal) static gboolean child_signal_init(gpointer p) { + crm_trace("Installed SIGCHLD handler"); /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); @@ -996,40 +996,55 @@ child_signal_init(gpointer p) return FALSE; } -gboolean +int mainloop_child_kill(pid_t pid) { GListPtr iter; mainloop_child_t *child = NULL; + mainloop_child_t *match = NULL; /* It is impossible to block SIGKILL, this allows us to * call waitpid without WNOHANG flag.*/ - int waitflags = 0; + int waitflags = 0, rc = 0; - for (iter = child_list; iter != NULL; iter = iter->next) { + for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) { child = iter->data; if (pid == child->pid) { - break; + match = child; } } - if (child == NULL) { + if (match == NULL) { return FALSE; } - if (child_kill_helper(child) != 0) { - /* If KILL failed, likely the pid has already exited. - * We'll need to set the WNOHANG flag since we can't be - * certain though. */ + rc = child_kill_helper(match); + if(rc == -ESRCH) { + /* Its gone, but hasn't shown up in waitpid() yet + * + * Wait until we get SIGCHLD and let child_death_dispatch() + * clean it up as normal (so we get the correct return + * code/status) + * + * The blocking alternative would be to call: + * child_waitpid(match, 0); + */ + crm_trace("Waiting for child %d to be reaped by child_death_dispatch()", match->pid); + return TRUE; + + } else if(rc != 0) { + /* If KILL for some other reason set the WNOHANG flag since we + * can't be certain what happened. + */ waitflags = WNOHANG; } - if (child_waitpid(child, waitflags) == FALSE) { + if (child_waitpid(match, waitflags) == FALSE) { /* not much we can do if this occurs */ return FALSE; } - child_list = g_list_remove(child_list, child); - child_free(child); + child_list = g_list_remove(child_list, match); + child_free(match); return TRUE; }