From 41d592fd4383ebbdabbeff308eeecf70377a75e7 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 20 Feb 2009 12:11:48 +0200 Subject: [PATCH] Refactor, so that the system isn't opened for business until startup process has died. --- src/backend/access/transam/xlog.c | 2 +- src/backend/postmaster/postmaster.c | 281 +++++++++++----------------- src/backend/storage/ipc/ipc.c | 4 - src/backend/storage/ipc/pmsignal.c | 17 ++ src/include/storage/pmsignal.h | 1 + 5 files changed, 125 insertions(+), 180 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 97fb14868a..6c0b5f116e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7695,7 +7695,7 @@ StartupProcessMain(void) BuildFlatFiles(false); /* Let postmaster know that startup is finished */ - SendPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED); + SetPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED); /* exit normally */ proc_exit(0); diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 70d9ca246c..80460d357d 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -227,14 +227,6 @@ static int Shutdown = NoShutdown; static bool FatalError = false; /* T if recovering from backend crash */ static bool RecoveryError = false; /* T if recovery failed */ -/* State of WAL redo */ -#define NoRecovery 0 -#define RecoveryStarted 1 -#define RecoveryConsistent 2 -#define RecoveryCompleted 3 - -static int RecoveryStatus = NoRecovery; - /* * We use a simple state machine to control startup, shutdown, and * crash recovery (which is rather like shutdown followed by startup). @@ -253,9 +245,12 @@ static int RecoveryStatus = NoRecovery; * point, if we had the infrastructure to do that. * * When the WAL redo is finished, the startup process signals us the third - * time, and we switch to PM_RUN state. The startup process can also skip the - * recovery and consistent recovery phases altogether, as it will during - * normal startup when there's no recovery to be done, for example. + * time, and exits. We don't process the 3d signal immediately but when we + * see the that the startup process has exited, we check that we have + * received the signal. If everything is OK, we then switch to PM_RUN state. + * The startup process can also skip the recovery and consistent recovery + * phases altogether, as it will during normal startup when there's no + * recovery to be done, for example. * * Normal child backends can only be launched when we are in PM_RUN state. * (We also allow it in PM_WAIT_BACKUP state, but only for superusers.) @@ -338,7 +333,6 @@ static void pmdie(SIGNAL_ARGS); static void reaper(SIGNAL_ARGS); static void sigusr1_handler(SIGNAL_ARGS); static void dummy_handler(SIGNAL_ARGS); -static void CheckRecoverySignals(void); static void CleanupBackend(int pid, int exitstatus); static void HandleChildCrash(int pid, int exitstatus, const char *procname); static void LogChildExit(int lev, const char *procname, @@ -2019,7 +2013,8 @@ pmdie(SIGNAL_ARGS) ereport(LOG, (errmsg("received smart shutdown request"))); - if (pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT) + if (pmState == PM_RUN || pmState == PM_RECOVERY || + pmState == PM_RECOVERY_CONSISTENT) { /* autovacuum workers are told to shut down immediately */ SignalAutovacWorkers(SIGTERM); @@ -2159,23 +2154,24 @@ reaper(SIGNAL_ARGS) */ if (pid == StartupPID) { + bool recoveryCompleted; + StartupPID = 0; /* - * Check if we've received a signal from the startup process - * first. This can change pmState. If the startup process sends - * a signal and exits immediately after that, we might not have - * processed the signal yet. We need to know if it completed - * recovery before it exited. + * Check if the startup process completed recovery before exiting */ - CheckRecoverySignals(); + if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED)) + recoveryCompleted = true; + else + recoveryCompleted = false; /* * Unexpected exit of startup process (including FATAL exit) * during PM_STARTUP is treated as catastrophic. There is no - * other processes running yet. + * other processes running yet, so we can just exit. */ - if (pmState == PM_STARTUP) + if (pmState == PM_STARTUP && !recoveryCompleted) { LogChildExit(LOG, _("startup process"), pid, exitstatus); @@ -2195,18 +2191,65 @@ reaper(SIGNAL_ARGS) _("startup process")); continue; } + /* + * Startup process exited in response to a shutdown request (or + * it finished normally regardless of the shutdown request). + */ + if (Shutdown > NoShutdown) + { + pmState = PM_WAIT_BACKENDS; + /* PostmasterStateMachine logic does the rest */ + continue; + } /* * Startup process exited normally, but didn't finish recovery. * This can happen if someone else than postmaster kills the * startup process with SIGTERM. Treat it like a crash. */ - if (pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT) + if (!recoveryCompleted) { RecoveryError = true; HandleChildCrash(pid, exitstatus, _("startup process")); continue; } + + /* + * Startup succeeded, commence normal operations + */ + pmState = PM_RUN; + + /* + * Load the flat authorization file into postmaster's cache. The + * startup process has recomputed this from the database contents, + * so we wait till it finishes before loading it. + */ + load_role(); + + /* + * Crank up the background writer, if we didn't do that already + * when we entered consistent recovery phase. It doesn't matter + * if this fails, we'll just try again later. + */ + if (BgWriterPID == 0) + BgWriterPID = StartBackgroundWriter(); + + /* + * Likewise, start other special children as needed. In a restart + * situation, some of them may be alive already. + */ + if (WalWriterPID == 0) + WalWriterPID = StartWalWriter(); + if (AutoVacuumingActive() && AutoVacPID == 0) + AutoVacPID = StartAutoVacLauncher(); + if (XLogArchivingActive() && PgArchPID == 0) + PgArchPID = pgarch_start(); + if (PgStatPID == 0) + PgStatPID = pgstat_start(); + + /* at this point we are really open for business */ + ereport(LOG, + (errmsg("database system is ready to accept connections"))); } /* @@ -2622,127 +2665,6 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) static void PostmasterStateMachine(void) { - /* Startup states */ - - if (pmState == PM_STARTUP && RecoveryStatus > NoRecovery) - { - /* WAL redo has started. We're out of reinitialization. */ - FatalError = false; - - /* - * Go to shutdown mode if a shutdown request was pending. - */ - if (Shutdown > NoShutdown) - { - pmState = PM_WAIT_BACKENDS; - /* PostmasterStateMachine logic does the rest */ - } - else - { - /* - * Crank up the background writer. It doesn't matter if this - * fails, we'll just try again later. - */ - Assert(BgWriterPID == 0); - BgWriterPID = StartBackgroundWriter(); - - pmState = PM_RECOVERY; - } - } - if (pmState == PM_RECOVERY && RecoveryStatus >= RecoveryConsistent) - { - /* - * Go to shutdown mode if a shutdown request was pending. - */ - if (Shutdown > NoShutdown) - { - pmState = PM_WAIT_BACKENDS; - /* PostmasterStateMachine logic does the rest */ - } - else - { - /* - * Startup process has entered recovery. We consider that good - * enough to reset FatalError. - */ - pmState = PM_RECOVERY_CONSISTENT; - - /* - * Load the flat authorization file into postmaster's cache. The - * startup process won't have recomputed this from the database yet, - * so we it may change following recovery. - */ - load_role(); - - /* - * Likewise, start other special children as needed. - */ - Assert(PgStatPID == 0); - PgStatPID = pgstat_start(); - - /* XXX at this point we could accept read-only connections */ - ereport(DEBUG1, - (errmsg("database system is in consistent recovery mode"))); - } - } - if ((pmState == PM_RECOVERY || - pmState == PM_RECOVERY_CONSISTENT || - pmState == PM_STARTUP) && - RecoveryStatus == RecoveryCompleted) - { - /* - * Startup succeeded. - * - * Go to shutdown mode if a shutdown request was pending. - */ - if (Shutdown > NoShutdown) - { - pmState = PM_WAIT_BACKENDS; - /* PostmasterStateMachine logic does the rest */ - } - else - { - /* - * Otherwise, commence normal operations. - */ - pmState = PM_RUN; - - /* - * Load the flat authorization file into postmaster's cache. The - * startup process has recomputed this from the database contents, - * so we wait till it finishes before loading it. - */ - load_role(); - - /* - * Crank up the background writer, if we didn't do that already - * when we entered consistent recovery phase. It doesn't matter - * if this fails, we'll just try again later. - */ - if (BgWriterPID == 0) - BgWriterPID = StartBackgroundWriter(); - - /* - * Likewise, start other special children as needed. In a restart - * situation, some of them may be alive already. - */ - if (WalWriterPID == 0) - WalWriterPID = StartWalWriter(); - if (AutoVacuumingActive() && AutoVacPID == 0) - AutoVacPID = StartAutoVacLauncher(); - if (XLogArchivingActive() && PgArchPID == 0) - PgArchPID = pgarch_start(); - if (PgStatPID == 0) - PgStatPID = pgstat_start(); - - /* at this point we are really open for business */ - ereport(LOG, - (errmsg("database system is ready to accept connections"))); - } - } - - /* Shutdown states */ - if (pmState == PM_WAIT_BACKUP) { /* @@ -2904,8 +2826,6 @@ PostmasterStateMachine(void) shmem_exit(1); reset_shared(PostPortNumber); - RecoveryStatus = NoRecovery; - StartupPID = StartupDataBase(); Assert(StartupPID != 0); pmState = PM_STARTUP; @@ -4010,47 +3930,58 @@ ExitPostmaster(int status) } /* - * common code used in sigusr1_handler() and reaper() to handle - * recovery-related signals from startup process + * sigusr1_handler - handle signal conditions from child processes */ static void -CheckRecoverySignals(void) +sigusr1_handler(SIGNAL_ARGS) { - bool changed = false; + int save_errno = errno; - if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED)) - { - Assert(pmState == PM_STARTUP); + PG_SETMASK(&BlockSig); - RecoveryStatus = RecoveryStarted; - changed = true; - } - if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT)) + /* + * RECOVERY_STARTED and RECOVERY_CONSISTENT signals are ignored in + * unexpected states. If the startup process quickly starts up, completes + * recovery, exits, we might process the death of the startup process + * first. We don't want to go back to recovery in that case. + */ + if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) && + pmState == PM_STARTUP) { - RecoveryStatus = RecoveryConsistent; - changed = true; + /* WAL redo has started. We're out of reinitialization. */ + FatalError = false; + + /* + * Crank up the background writer. It doesn't matter if this + * fails, we'll just try again later. + */ + Assert(BgWriterPID == 0); + BgWriterPID = StartBackgroundWriter(); + + pmState = PM_RECOVERY; } - if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED)) + if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) && + pmState == PM_RECOVERY) { - RecoveryStatus = RecoveryCompleted; - changed = true; - } - - if (changed) - PostmasterStateMachine(); -} + /* + * Load the flat authorization file into postmaster's cache. The + * startup process won't have recomputed this from the database yet, + * so we it may change following recovery. + */ + load_role(); -/* - * sigusr1_handler - handle signal conditions from child processes - */ -static void -sigusr1_handler(SIGNAL_ARGS) -{ - int save_errno = errno; + /* + * Likewise, start other special children as needed. + */ + Assert(PgStatPID == 0); + PgStatPID = pgstat_start(); - PG_SETMASK(&BlockSig); + /* XXX at this point we could accept read-only connections */ + ereport(DEBUG1, + (errmsg("database system is in consistent recovery mode"))); - CheckRecoverySignals(); + pmState = PM_RECOVERY_CONSISTENT; + } if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE)) { diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index 13623a3c6a..50ed3e1dc8 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -95,8 +95,6 @@ proc_exit(int code) InterruptHoldoffCount = 1; CritSectionCount = 0; - elog(DEBUG3, "proc_exit(%d)", code); - /* do our shared memory exits first */ shmem_exit(code); @@ -161,8 +159,6 @@ proc_exit(int code) void shmem_exit(int code) { - elog(DEBUG3, "shmem_exit(%d)", code); - /* * call all the registered callbacks. * diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 00bbbc7b43..2c992c443a 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -71,6 +71,23 @@ SendPostmasterSignal(PMSignalReason reason) kill(PostmasterPid, SIGUSR1); } +/* + * SetPostmasterSignal - like SendPostmasterSignal, but don't wake up + * postmaster + * + * This is for signals that the postmaster polls with CheckPostmasterSignal() + * but isn't interested in processing immediately. + */ +void +SetPostmasterSignal(PMSignalReason reason) +{ + /* If called in a standalone backend, do nothing */ + if (!IsUnderPostmaster) + return; + /* Atomically set the proper flag */ + PMSignalFlags[reason] = true; +} + /* * CheckPostmasterSignal - check to see if a particular reason has been * signaled, and clear the signal flag. Should be called by postmaster diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 21b1e90f59..490dd92113 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -39,6 +39,7 @@ typedef enum */ extern void PMSignalInit(void); extern void SendPostmasterSignal(PMSignalReason reason); +extern void SetPostmasterSignal(PMSignalReason reason); extern bool CheckPostmasterSignal(PMSignalReason reason); extern bool PostmasterIsAlive(bool amDirectChild); -- 2.39.5