<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.18 2006/09/16 00:30:19 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.19 2006/12/08 19:50:52 tgl Exp $
 PostgreSQL documentation
 -->
 
    <literal>-f</> can still be used, but
    the recovered database must be treated with even more suspicion than
    usual: an immediate dump and reload is imperative.  <emphasis>Do not</>
-   execute any data-modifying operations in the database before you dump;
+   execute any data-modifying operations in the database before you dump,
    as any such action is likely to make the corruption worse.
   </para>
 
     <listitem>
      <para>
       The WAL starting address (<literal>-l</>) should be
-      larger than any file name currently existing in
+      larger than any WAL segment file name currently existing in
       the directory <filename>pg_xlog</> under the data directory.
       These names are also in hexadecimal and have three parts.  The first
       part is the <quote>timeline ID</> and should usually be kept the same.
       <filename>000000010000003A000000FF</>, choose <literal>-l 0x1,0x3B,0x0</>
       or more.
      </para>
+
+     <note>
+      <para>
+       <command>pg_resetxlog</command> itself looks at the files in
+       <filename>pg_xlog</> and chooses a default <literal>-l</> setting
+       beyond the last existing file name.  Therefore, manual adjustment of
+       <literal>-l</> should only be needed if you are aware of WAL segment
+       files that are not currently present in <filename>pg_xlog</>, such as
+       entries in an offline archive; or if the contents of
+       <filename>pg_xlog</> have been lost entirely.
+      </para>
+     </note>
     </listitem>
 
     <listitem>
 
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.258 2006/11/30 18:29:11 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.259 2006/12/08 19:50:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
            openLogFile = XLogFileInit(openLogId, openLogSeg,
                                       &use_existent, true);
            openLogOff = 0;
-
-           /* update pg_control, unless someone else already did */
-           LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-           if (ControlFile->logId < openLogId ||
-               (ControlFile->logId == openLogId &&
-                ControlFile->logSeg < openLogSeg + 1))
-           {
-               ControlFile->logId = openLogId;
-               ControlFile->logSeg = openLogSeg + 1;
-               ControlFile->time = time(NULL);
-               UpdateControlFile();
-
-               /*
-                * Signal bgwriter to start a checkpoint if it's been too long
-                * since the last one.  (We look at local copy of RedoRecPtr
-                * which might be a little out of date, but should be close
-                * enough for this purpose.)
-                *
-                * A straight computation of segment number could overflow 32
-                * bits.  Rather than assuming we have working 64-bit
-                * arithmetic, we compare the highest-order bits separately,
-                * and force a checkpoint immediately when they change.
-                */
-               if (IsUnderPostmaster)
-               {
-                   uint32      old_segno,
-                               new_segno;
-                   uint32      old_highbits,
-                               new_highbits;
-
-                   old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
-                       (RedoRecPtr.xrecoff / XLogSegSize);
-                   old_highbits = RedoRecPtr.xlogid / XLogSegSize;
-                   new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
-                       openLogSeg;
-                   new_highbits = openLogId / XLogSegSize;
-                   if (new_highbits != old_highbits ||
-                       new_segno >= old_segno + (uint32) CheckPointSegments)
-                   {
-#ifdef WAL_DEBUG
-                       if (XLOG_DEBUG)
-                           elog(LOG, "time for a checkpoint, signaling bgwriter");
-#endif
-                       RequestCheckpoint(false, true);
-                   }
-               }
-           }
-           LWLockRelease(ControlFileLock);
        }
 
        /* Make sure we have the current logfile open */
             *
             * This is also the right place to notify the Archiver that the
             * segment is ready to copy to archival storage, and to update the
-            * timer for archive_timeout.
+            * timer for archive_timeout, and to signal for a checkpoint if
+            * too many logfile segments have been used since the last
+            * checkpoint.
             */
            if (finishing_seg || (xlog_switch && last_iteration))
            {
                    XLogArchiveNotifySeg(openLogId, openLogSeg);
 
                Write->lastSegSwitchTime = time(NULL);
+
+               /*
+                * Signal bgwriter to start a checkpoint if it's been too long
+                * since the last one.  (We look at local copy of RedoRecPtr
+                * which might be a little out of date, but should be close
+                * enough for this purpose.)
+                *
+                * A straight computation of segment number could overflow 32
+                * bits.  Rather than assuming we have working 64-bit
+                * arithmetic, we compare the highest-order bits separately,
+                * and force a checkpoint immediately when they change.
+                */
+               if (IsUnderPostmaster)
+               {
+                   uint32      old_segno,
+                               new_segno;
+                   uint32      old_highbits,
+                               new_highbits;
+
+                   old_segno = (RedoRecPtr.xlogid % XLogSegSize) * XLogSegsPerFile +
+                       (RedoRecPtr.xrecoff / XLogSegSize);
+                   old_highbits = RedoRecPtr.xlogid / XLogSegSize;
+                   new_segno = (openLogId % XLogSegSize) * XLogSegsPerFile +
+                       openLogSeg;
+                   new_highbits = openLogId / XLogSegSize;
+                   if (new_highbits != old_highbits ||
+                       new_segno >= old_segno + (uint32) (CheckPointSegments-1))
+                   {
+#ifdef WAL_DEBUG
+                       if (XLOG_DEBUG)
+                           elog(LOG, "time for a checkpoint, signaling bgwriter");
+#endif
+                       RequestCheckpoint(false, true);
+                   }
+               }
            }
        }
 
    ControlFile->system_identifier = sysidentifier;
    ControlFile->state = DB_SHUTDOWNED;
    ControlFile->time = checkPoint.time;
-   ControlFile->logId = 0;
-   ControlFile->logSeg = 1;
    ControlFile->checkPoint = checkPoint.redo;
    ControlFile->checkPointCopy = checkPoint;
    /* some additional ControlFile fields are set in WriteControlFile() */
     */
    ReadControlFile();
 
-   if (ControlFile->logSeg == 0 ||
-       ControlFile->state < DB_SHUTDOWNED ||
+   if (ControlFile->state < DB_SHUTDOWNED ||
        ControlFile->state > DB_IN_PRODUCTION ||
        !XRecOffIsValid(ControlFile->checkPoint.xrecoff))
        ereport(FATAL,
                        str_time(ControlFile->time))));
    else if (ControlFile->state == DB_SHUTDOWNING)
        ereport(LOG,
-               (errmsg("database system shutdown was interrupted at %s",
+               (errmsg("database system shutdown was interrupted; last known up at %s",
                        str_time(ControlFile->time))));
    else if (ControlFile->state == DB_IN_CRASH_RECOVERY)
        ereport(LOG,
                " and you may need to choose an earlier recovery target.")));
    else if (ControlFile->state == DB_IN_PRODUCTION)
        ereport(LOG,
-               (errmsg("database system was interrupted at %s",
+               (errmsg("database system was interrupted; last known up at %s",
                        str_time(ControlFile->time))));
 
    /* This is just to allow attaching to startup process with a debugger */
    openLogSeg = endLogSeg;
    openLogFile = XLogFileOpen(openLogId, openLogSeg);
    openLogOff = 0;
-   ControlFile->logId = openLogId;
-   ControlFile->logSeg = openLogSeg + 1;
    Insert = &XLogCtl->Insert;
    Insert->PrevRecord = LastRec;
    XLogCtl->xlblocks[0].xlogid = openLogId;
 
  * licence: BSD
  *
- * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.31 2006/08/21 16:16:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.32 2006/12/08 19:50:53 tgl Exp $
  */
 #include "postgres.h"
 
           dbState(ControlFile.state));
    printf(_("pg_control last modified:             %s\n"),
           pgctime_str);
-   printf(_("Current log file ID:                  %u\n"),
-          ControlFile.logId);
-   printf(_("Next log file segment:                %u\n"),
-          ControlFile.logSeg);
    printf(_("Latest checkpoint location:           %X/%X\n"),
           ControlFile.checkPoint.xlogid,
           ControlFile.checkPoint.xrecoff);
 
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.53 2006/10/04 00:30:05 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.54 2006/12/08 19:50:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 static void GuessControlValues(void);
 static void PrintControlValues(bool guessed);
 static void RewriteControlFile(void);
+static void FindEndOfXLOG(void);
 static void KillExistingXLOG(void);
 static void WriteEmptyXLOG(void);
 static void usage(void);
    if (!ReadControlFile())
        GuessControlValues();
 
+   /*
+    * Also look at existing segment files to set up newXlogId/newXlogSeg
+    */
+   FindEndOfXLOG();
+
    /*
     * Adjust fields if required by switches.  (Do this now so that printout,
     * if any, includes these values.)
    if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
        ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
 
-   if (minXlogId > ControlFile.logId ||
-       (minXlogId == ControlFile.logId &&
-        minXlogSeg > ControlFile.logSeg))
+   if (minXlogId > newXlogId ||
+       (minXlogId == newXlogId &&
+        minXlogSeg > newXlogSeg))
    {
-       ControlFile.logId = minXlogId;
-       ControlFile.logSeg = minXlogSeg;
+       newXlogId = minXlogId;
+       newXlogSeg = minXlogSeg;
    }
 
    /*
 
    ControlFile.state = DB_SHUTDOWNED;
    ControlFile.time = time(NULL);
-   ControlFile.logId = 0;
-   ControlFile.logSeg = 1;
    ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
 
    ControlFile.maxAlign = MAXIMUM_ALIGNOF;
    snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
             ControlFile.system_identifier);
 
+   printf(_("First log file ID for new XLOG:       %u\n"),
+          newXlogId);
+   printf(_("First log file segment for new XLOG:  %u\n"),
+          newXlogSeg);
    printf(_("pg_control version number:            %u\n"),
           ControlFile.pg_control_version);
    printf(_("Catalog version number:               %u\n"),
           ControlFile.catalog_version_no);
    printf(_("Database system identifier:           %s\n"),
           sysident_str);
-   printf(_("Current log file ID:                  %u\n"),
-          ControlFile.logId);
-   printf(_("Next log file segment:                %u\n"),
-          ControlFile.logSeg);
    printf(_("Latest checkpoint's TimeLineID:       %u\n"),
           ControlFile.checkPointCopy.ThisTimeLineID);
    printf(_("Latest checkpoint's NextXID:          %u/%u\n"),
    char        buffer[PG_CONTROL_SIZE];        /* need not be aligned */
 
    /*
-    * Adjust fields as needed to force an empty XLOG starting at the next
-    * available segment.
+    * Adjust fields as needed to force an empty XLOG starting at
+    * newXlogId/newXlogSeg.
     */
-   newXlogId = ControlFile.logId;
-   newXlogSeg = ControlFile.logSeg;
-
-   /* adjust in case we are changing segment size */
-   newXlogSeg *= ControlFile.xlog_seg_size;
-   newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
-
-   /* be sure we wrap around correctly at end of a logfile */
-   NextLogSeg(newXlogId, newXlogSeg);
-
-   /* Now we can force the recorded xlog seg size to the right thing. */
-   ControlFile.xlog_seg_size = XLogSegSize;
-
    ControlFile.checkPointCopy.redo.xlogid = newXlogId;
    ControlFile.checkPointCopy.redo.xrecoff =
        newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
 
    ControlFile.state = DB_SHUTDOWNED;
    ControlFile.time = time(NULL);
-   ControlFile.logId = newXlogId;
-   ControlFile.logSeg = newXlogSeg + 1;
    ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
    ControlFile.prevCheckPoint.xlogid = 0;
    ControlFile.prevCheckPoint.xrecoff = 0;
    ControlFile.minRecoveryPoint.xlogid = 0;
    ControlFile.minRecoveryPoint.xrecoff = 0;
 
+   /* Now we can force the recorded xlog seg size to the right thing. */
+   ControlFile.xlog_seg_size = XLogSegSize;
+
    /* Contents are protected with a CRC */
    INIT_CRC32(ControlFile.crc);
    COMP_CRC32(ControlFile.crc,
 }
 
 
+/*
+ * Scan existing XLOG files and determine the highest existing WAL address
+ *
+ * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
+ * are assumed valid (note that we allow the old xlog seg size to differ
+ * from what we're using).  On exit, newXlogId and newXlogSeg are set to
+ * suitable values for the beginning of replacement WAL (in our seg size).
+ */
+static void
+FindEndOfXLOG(void)
+{
+   DIR        *xldir;
+   struct dirent *xlde;
+
+   /*
+    * Initialize the max() computation using the last checkpoint address
+    * from old pg_control.  Note that for the moment we are working with
+    * segment numbering according to the old xlog seg size.
+    */
+   newXlogId = ControlFile.checkPointCopy.redo.xlogid;
+   newXlogSeg = ControlFile.checkPointCopy.redo.xrecoff / ControlFile.xlog_seg_size;
+
+   /*
+    * Scan the pg_xlog directory to find existing WAL segment files.
+    * We assume any present have been used; in most scenarios this should
+    * be conservative, because of xlog.c's attempts to pre-create files.
+    */
+   xldir = opendir(XLOGDIR);
+   if (xldir == NULL)
+   {
+       fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+               progname, XLOGDIR, strerror(errno));
+       exit(1);
+   }
+
+   errno = 0;
+   while ((xlde = readdir(xldir)) != NULL)
+   {
+       if (strlen(xlde->d_name) == 24 &&
+           strspn(xlde->d_name, "0123456789ABCDEF") == 24)
+       {
+           unsigned int    tli,
+                           log,
+                           seg;
+
+           sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
+           /*
+            * Note: we take the max of all files found, regardless of their
+            * timelines.  Another possibility would be to ignore files of
+            * timelines other than the target TLI, but this seems safer.
+            * Better too large a result than too small...
+            */
+           if (log > newXlogId ||
+               (log == newXlogId && seg > newXlogSeg))
+           {
+               newXlogId = log;
+               newXlogSeg = seg;
+           }
+       }
+       errno = 0;
+   }
+#ifdef WIN32
+
+   /*
+    * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
+    * released version
+    */
+   if (GetLastError() == ERROR_NO_MORE_FILES)
+       errno = 0;
+#endif
+
+   if (errno)
+   {
+       fprintf(stderr, _("%s: could not read from directory \"%s\": %s\n"),
+               progname, XLOGDIR, strerror(errno));
+       exit(1);
+   }
+   closedir(xldir);
+
+   /*
+    * Finally, convert to new xlog seg size, and advance by one to ensure
+    * we are in virgin territory.
+    */
+   newXlogSeg *= ControlFile.xlog_seg_size;
+   newXlogSeg = (newXlogSeg + XLogSegSize - 1) / XLogSegSize;
+
+   /* be sure we wrap around correctly at end of a logfile */
+   NextLogSeg(newXlogId, newXlogSeg);
+}
+
+
 /*
  * Remove existing XLOG files
  */
 
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.33 2006/10/04 00:30:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.34 2006/12/08 19:50:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 822
+#define PG_CONTROL_VERSION 831
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
     */
    DBState     state;          /* see enum above */
    time_t      time;           /* time stamp of last pg_control update */
-   uint32      logId;          /* current log file id */
-   uint32      logSeg;         /* current log file segment, + 1 */
    XLogRecPtr  checkPoint;     /* last check point record ptr */
    XLogRecPtr  prevCheckPoint; /* previous check point record ptr */