pg_upgrade: detect stale postmaster.pid lock files
authorBruce Momjian <[email protected]>
Thu, 24 Jan 2013 20:20:11 +0000 (15:20 -0500)
committerBruce Momjian <[email protected]>
Thu, 24 Jan 2013 20:20:40 +0000 (15:20 -0500)
If the postmaster.pid lock file exists, try starting/stopping the
cluster to check if the lock file is valid.

Per request from Tom.

contrib/pg_upgrade/check.c
contrib/pg_upgrade/exec.c
contrib/pg_upgrade/pg_upgrade.c
contrib/pg_upgrade/pg_upgrade.h
contrib/pg_upgrade/server.c

index 1780788e003cf20c207623835667e59325c674e7..818864346ea416a648e5f0a330320b7357d87d50 100644 (file)
@@ -56,11 +56,10 @@ fix_path_separator(char *path)
 }
 
 void
-output_check_banner(bool *live_check)
+output_check_banner(bool live_check)
 {
-       if (user_opts.check && is_server_running(old_cluster.pgdata))
+       if (user_opts.check && live_check)
        {
-               *live_check = true;
                pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");
                pg_log(PG_REPORT, "------------------------------------------------\n");
        }
@@ -78,7 +77,7 @@ check_and_dump_old_cluster(bool live_check, char **sequence_script_file_name)
        /* -- OLD -- */
 
        if (!live_check)
-               start_postmaster(&old_cluster);
+               start_postmaster(&old_cluster, true);
 
        set_locale_and_encoding(&old_cluster);
 
@@ -201,7 +200,7 @@ issue_warnings(char *sequence_script_file_name)
        /* old = PG 8.3 warnings? */
        if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)
        {
-               start_postmaster(&new_cluster);
+               start_postmaster(&new_cluster, true);
 
                /* restore proper sequence values using file created from old server */
                if (sequence_script_file_name)
@@ -224,7 +223,7 @@ issue_warnings(char *sequence_script_file_name)
        /* Create dummy large object permissions for old < PG 9.0? */
        if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
        {
-               start_postmaster(&new_cluster);
+               start_postmaster(&new_cluster, true);
                new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
                stop_postmaster(false);
        }
index e326a10c6130afedc39031be13a060ce210076a0..44dafc36a9159933a9fc034d755c3f81d42fd87c 100644 (file)
@@ -140,13 +140,12 @@ exec_prog(const char *log_file, const char *opt_log_file,
 
 
 /*
- * is_server_running()
+ * pid_lock_file_exists()
  *
- * checks whether postmaster on the given data directory is running or not.
- * The check is performed by looking for the existence of postmaster.pid file.
+ * Checks whether the postmaster.pid file exists.
  */
 bool
-is_server_running(const char *datadir)
+pid_lock_file_exists(const char *datadir)
 {
        char            path[MAXPGPATH];
        int                     fd;
@@ -180,8 +179,6 @@ void
 verify_directories(void)
 {
 
-       prep_status("Checking current, bin, and data directories");
-
 #ifndef WIN32
        if (access(".", R_OK | W_OK | X_OK) != 0)
 #else
@@ -194,7 +191,6 @@ verify_directories(void)
        check_data_dir(old_cluster.pgdata);
        check_bin_dir(&new_cluster);
        check_data_dir(new_cluster.pgdata);
-       check_ok();
 }
 
 
index a752fe8eda1a260c8332f6765706ad416e920a70..55155d22e287ed2c79b544feaae6b3993e8b71d8 100644 (file)
@@ -48,7 +48,7 @@ static void prepare_new_databases(void);
 static void create_new_objects(void);
 static void copy_clog_xlog_xid(void);
 static void set_frozenxids(void);
-static void setup(char *argv0, bool live_check);
+static void setup(char *argv0, bool *live_check);
 static void cleanup(void);
 
 ClusterInfo old_cluster,
@@ -80,9 +80,9 @@ main(int argc, char **argv)
        adjust_data_dir(&old_cluster);
        adjust_data_dir(&new_cluster);
 
-       output_check_banner(&live_check);
+       setup(argv[0], &live_check);
 
-       setup(argv[0], live_check);
+       output_check_banner(live_check);
 
        check_cluster_versions();
 
@@ -95,7 +95,7 @@ main(int argc, char **argv)
 
 
        /* -- NEW -- */
-       start_postmaster(&new_cluster);
+       start_postmaster(&new_cluster, true);
 
        check_new_cluster();
        report_clusters_compatible();
@@ -116,7 +116,7 @@ main(int argc, char **argv)
        /* New now using xids of the old system */
 
        /* -- NEW -- */
-       start_postmaster(&new_cluster);
+       start_postmaster(&new_cluster, true);
 
        prepare_new_databases();
 
@@ -177,7 +177,7 @@ main(int argc, char **argv)
 
 
 static void
-setup(char *argv0, bool live_check)
+setup(char *argv0, bool *live_check)
 {
        char            exec_path[MAXPGPATH];   /* full path to my executable */
 
@@ -189,15 +189,39 @@ setup(char *argv0, bool live_check)
 
        verify_directories();
 
-       /* no postmasters should be running */
-       if (!live_check && is_server_running(old_cluster.pgdata))
-               pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
-                          "Please shutdown that postmaster and try again.\n");
+       /* no postmasters should be running, except for a live check */
+       if (pid_lock_file_exists(old_cluster.pgdata))
+       {
+               /*
+                *      If we have a postmaster.pid file, try to start the server.  If
+                *      it starts, the pid file was stale, so stop the server.  If it
+                *      doesn't start, assume the server is running.  If the pid file
+                *      is left over from a server crash, this also allows any committed
+                *      transactions stored in the WAL to be replayed so they are not
+                *      lost, because WAL files are not transfered from old to new
+                *      servers.
+                */             
+               if (start_postmaster(&old_cluster, false))
+                       stop_postmaster(false);
+               else
+               {
+                       if (!user_opts.check)
+                               pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
+                                          "Please shutdown that postmaster and try again.\n");
+                       else
+                               *live_check = true;
+               }
+       }
 
        /* same goes for the new postmaster */
-       if (is_server_running(new_cluster.pgdata))
-               pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
+       if (pid_lock_file_exists(new_cluster.pgdata))
+       {
+               if (start_postmaster(&new_cluster, false))
+                       stop_postmaster(false);
+               else
+                       pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
                           "Please shutdown that postmaster and try again.\n");
+       }
 
        /* get path to pg_upgrade executable */
        if (find_my_exec(argv0, exec_path) < 0)
index 70b93816679800bdf3b1c22cae9d12281e0c6d66..69b9c6030b71fe1cb6810c6db84669bd4b69e6af 100644 (file)
@@ -307,7 +307,7 @@ extern OSInfo os_info;
 
 /* check.c */
 
-void           output_check_banner(bool *live_check);
+void           output_check_banner(bool live_check);
 void           check_and_dump_old_cluster(bool live_check,
                                  char **sequence_script_file_name);
 void           check_new_cluster(void);
@@ -341,7 +341,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
                  bool throw_error, const char *fmt,...)
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
 void           verify_directories(void);
-bool           is_server_running(const char *datadir);
+bool           pid_lock_file_exists(const char *datadir);
 
 
 /* file.c */
@@ -429,7 +429,7 @@ __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
 
 char      *cluster_conn_opts(ClusterInfo *cluster);
 
-void           start_postmaster(ClusterInfo *cluster);
+bool           start_postmaster(ClusterInfo *cluster, bool throw_error);
 void           stop_postmaster(bool fast);
 uint32         get_major_server_version(ClusterInfo *cluster);
 void           check_pghost_envvar(void);
index 0b4825181bdff6452f34e526cd3488bff4d65c48..d1a3f76eff377a8ce51d3d05666bf3fd48420882 100644 (file)
@@ -170,8 +170,8 @@ stop_postmaster_atexit(void)
 }
 
 
-void
-start_postmaster(ClusterInfo *cluster)
+bool
+start_postmaster(ClusterInfo *cluster, bool throw_error)
 {
        char            cmd[MAXPGPATH * 4 + 1000];
        PGconn     *conn;
@@ -236,6 +236,9 @@ start_postmaster(ClusterInfo *cluster)
                                                          false,
                                                          "%s", cmd);
 
+       if (!pg_ctl_return && !throw_error)
+               return false;
+                                                         
        /* Check to see if we can connect to the server; if not, report it. */
        if ((conn = get_db_conn(cluster, "template1")) == NULL ||
                PQstatus(conn) != CONNECTION_OK)
@@ -256,6 +259,8 @@ start_postmaster(ClusterInfo *cluster)
                           CLUSTER_NAME(cluster));
 
        os_info.running_cluster = cluster;
+
+       return true;
 }