|
1 | 1 | /* |
2 | | - Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. |
| 2 | + Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. |
3 | 3 |
|
4 | 4 | This program is free software; you can redistribute it and/or modify |
5 | 5 | it under the terms of the GNU General Public License as published by |
|
16 | 16 | */ |
17 | 17 |
|
18 | 18 | #include <ndb_global.h> |
| 19 | +#include "NdbSleep.h" |
19 | 20 |
|
20 | 21 | #ifdef _WIN32 |
21 | 22 | #include <io.h> |
@@ -144,6 +145,7 @@ void CPCD::Process::monitor() |
144 | 145 | { |
145 | 146 | m_pid = bad_pid; |
146 | 147 | m_status = STOPPED; |
| 148 | + removePid(); |
147 | 149 | } |
148 | 150 | else if (time(NULL) > m_stopping_time + m_stop_timeout) |
149 | 151 | { |
@@ -589,6 +591,32 @@ int CPCD::Process::start() { |
589 | 591 | * take care of that. |
590 | 592 | */ |
591 | 593 | logger.info("Starting %d: %s", m_id, m_name.c_str()); |
| 594 | + |
| 595 | + /* Check if there is a left over pid file. |
| 596 | + * If so and process runs with written pid, let it run and fail starting new process. |
| 597 | + * If no process runs with written pid, remove pid file. |
| 598 | + */ |
| 599 | + if (readPid() >= 0) { |
| 600 | + if (isRunning()) { |
| 601 | + logger.error("Fail starting %d. Old pid file found. Leave running " |
| 602 | + "process (pid %d) running.\n", |
| 603 | + m_id, |
| 604 | + m_pid); |
| 605 | + m_status = STOPPED; |
| 606 | + m_pid = bad_pid; |
| 607 | + return -1; |
| 608 | + } |
| 609 | + else { |
| 610 | + logger.info("While starting %d. Found old pid file with no running " |
| 611 | + "process (pid %d). Removing pid file!\n", |
| 612 | + m_id, |
| 613 | + m_pid); |
| 614 | + m_status = STOPPED; |
| 615 | + m_pid = bad_pid; |
| 616 | + removePid(); |
| 617 | + } |
| 618 | + } |
| 619 | + |
592 | 620 | m_status = STARTING; |
593 | 621 |
|
594 | 622 | int pid = -1; |
@@ -670,15 +698,59 @@ int CPCD::Process::start() { |
670 | 698 | return -1; |
671 | 699 | } |
672 | 700 |
|
673 | | - while (readPid() < 0) { |
674 | | - sched_yield(); |
675 | | - } |
| 701 | + const int max_retries = 3; |
| 702 | + for (int retries = max_retries; retries > 0; retries--) { |
| 703 | + while (readPid() < 0) { |
| 704 | + sched_yield(); |
| 705 | + } |
676 | 706 |
|
677 | | - errno = 0; |
678 | | - pid_t pgid = IF_WIN(-1, getpgid(pid)); |
| 707 | + errno = 0; |
| 708 | + pid_t pgid = IF_WIN(-1, getpgid(pid)); |
| 709 | + |
| 710 | + if (pgid == -1 || pgid == m_pid) { |
| 711 | + if (retries < max_retries) |
| 712 | + { |
| 713 | + logger.info("Retry reading pid file succeeded: cpcd pid %d: forked " |
| 714 | + "pgid %d pid %d: file m_pid %d", |
| 715 | + getpid(), |
| 716 | + pgid, |
| 717 | + pid, |
| 718 | + m_pid); |
| 719 | + } |
| 720 | + break; |
| 721 | + } |
| 722 | + |
| 723 | + /* retry */ |
| 724 | + |
| 725 | + // For processtype PERMANENT pid and pgid must be -1 so never enter here. |
| 726 | + require(m_processType == TEMPORARY); |
| 727 | + logger.error("pgid and m_pid don't match: cpcd pid %d: forked pgid %d " |
| 728 | + "pid %d: file m_pid %d", |
| 729 | + getpid(), |
| 730 | + pgid, |
| 731 | + pid, |
| 732 | + m_pid); |
| 733 | + |
| 734 | + if (retries == 1) { |
| 735 | + /* Last try reading pid file failed. |
| 736 | + * For TEMPORARY where pid of started process is known, kill it. |
| 737 | + */ |
| 738 | +#ifndef _WIN32 |
| 739 | + logger.error("After pid file mismatch, forced kill of forked process " |
| 740 | + "group (pgid %d).", |
| 741 | + pgid); |
| 742 | + kill(-pgid, 9); |
| 743 | +#endif |
| 744 | + logger.error("After pid file mismatch, stop started process %d " |
| 745 | + "(pid %d).", |
| 746 | + m_id, |
| 747 | + m_pid); |
| 748 | + stop(); |
| 749 | + return -1; |
| 750 | + } |
679 | 751 |
|
680 | | - if (pgid != -1 && pgid != m_pid) { |
681 | | - logger.error("pgid and m_pid don't match: %d %d (%d)", pgid, m_pid, pid); |
| 752 | + m_pid = bad_pid; |
| 753 | + NdbSleep_SecSleep(1); |
682 | 754 | } |
683 | 755 |
|
684 | 756 | if (isRunning()) |
|
0 commit comments