pg_buffercache_pages.o
EXTENSION = pg_buffercache
-DATA = pg_buffercache--1.0--1.1.sql pg_buffercache--1.1--1.2.sql pg_buffercache--1.2.sql \
- pg_buffercache--1.2--1.3.sql pg_buffercache--1.3--1.4.sql
+DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \
+ pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql
PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time"
REGRESS = pg_buffercache
'pg_buffercache--1.1--1.2.sql',
'pg_buffercache--1.2--1.3.sql',
'pg_buffercache--1.2.sql',
- 'pg_buffercache--1.3--1.4.sql',
'pg_buffercache.control',
kwargs: contrib_data_args,
)
+++ /dev/null
-/* contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql */
-
--- complain if script is sourced in psql, rather than via ALTER EXTENSION
-\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.4'" to load this file. \quit
-
-/* First we have to remove them from the extension */
-ALTER EXTENSION pg_buffercache DROP VIEW pg_buffercache;
-ALTER EXTENSION pg_buffercache DROP FUNCTION pg_buffercache_pages();
-
-/* Then we can drop them */
-DROP VIEW pg_buffercache;
-DROP FUNCTION pg_buffercache_pages();
-
-/* Now redefine */
-CREATE FUNCTION pg_buffercache_pages()
-RETURNS SETOF RECORD
-AS 'MODULE_PATHNAME', 'pg_buffercache_pages_v1_4'
-LANGUAGE C PARALLEL SAFE;
-
-CREATE VIEW pg_buffercache AS
- SELECT P.* FROM pg_buffercache_pages() AS P
- (bufferid integer, relfilenode int8, reltablespace oid, reldatabase oid,
- relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2,
- pinning_backends int4);
-
--- Don't want these to be available to public.
-REVOKE ALL ON FUNCTION pg_buffercache_pages() FROM PUBLIC;
-REVOKE ALL ON pg_buffercache FROM PUBLIC;
-GRANT EXECUTE ON FUNCTION pg_buffercache_pages() TO pg_monitor;
-GRANT SELECT ON pg_buffercache TO pg_monitor;
# pg_buffercache extension
comment = 'examine the shared buffer cache'
-default_version = '1.4'
+default_version = '1.3'
module_pathname = '$libdir/pg_buffercache'
relocatable = true
* relation node/tablespace/database/blocknum and dirty indicator.
*/
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
-PG_FUNCTION_INFO_V1(pg_buffercache_pages_v1_4);
-static Datum
-pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid)
+Datum
+pg_buffercache_pages(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
INT4OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
- rfn_typid, -1, 0);
+ OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
}
else
{
- if (rfn_typid == INT8OID)
- values[1] =
- Int64GetDatum((int64) fctx->record[i].relfilenumber);
- else
- {
- Assert(rfn_typid == OIDOID);
-
- if (fctx->record[i].relfilenumber > OID_MAX)
- ereport(ERROR,
- errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("relfilenode %llu is too large to be represented as an OID",
- (unsigned long long) fctx->record[i].relfilenumber),
- errhint("Upgrade the extension using ALTER EXTENSION pg_buffercache UPDATE"));
-
- values[1] =
- ObjectIdGetDatum((Oid) fctx->record[i].relfilenumber);
- }
-
+ values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
nulls[1] = false;
values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
nulls[2] = false;
else
SRF_RETURN_DONE(funcctx);
}
-
-/* entry point for old extension version */
-Datum
-pg_buffercache_pages(PG_FUNCTION_ARGS)
-{
- return pg_buffercache_pages_internal(fcinfo, OIDOID);
-}
-
-Datum
-pg_buffercache_pages_v1_4(PG_FUNCTION_ARGS)
-{
- return pg_buffercache_pages_internal(fcinfo, INT8OID);
-}
{
unsigned forknum;
- if (fscanf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n", &blkinfo[i].database,
+ if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
&blkinfo[i].tablespace, &blkinfo[i].filenumber,
&forknum, &blkinfo[i].blocknum) != 5)
ereport(ERROR,
{
CHECK_FOR_INTERRUPTS();
- ret = fprintf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n",
+ ret = fprintf(file, "%u,%u,%u,%u,%u\n",
block_info_array[i].database,
block_info_array[i].tablespace,
block_info_array[i].filenumber,
-- ===================================================================
-- Test for filtering out WAL records of a particular table
-- ===================================================================
-SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
- WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap';
+ WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
ok
----
t
-- Test for filtering out WAL records of a particular table
-- ===================================================================
-SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
- WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap';
+ WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
-- ===================================================================
-- Test for filtering out WAL records based on resource_manager and
<row>
<entry role="catalog_table_entry"><para role="column_definition">
- <structfield>relfilenode</structfield> <type>int8</type>
+ <structfield>relfilenode</structfield> <type>oid</type>
</para>
<para>
Name of the on-disk file of this relation; zero means this
<entry><type>timestamp with time zone</type></entry>
</row>
- <row>
- <entry><structfield>next_relfilenumber</structfield></entry>
- <entry><type>timestamp with time zone</type></entry>
- </row>
-
</tbody>
</tgroup>
</table>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
- <structfield>relfilenode</structfield> <type>int8</type>
+ <structfield>relfilenode</structfield> <type>oid</type>
(references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>)
</para>
<para>
<caution>
<para>
-Note that a table's filenode will normally be different than the OID. For
-system tables, the initial filenode will be equal to the table OID, but it will
-be different if the table has ever been subjected to a rewriting operation,
-such as <command>TRUNCATE</command>, <command>REINDEX</command>,
-<command>CLUSTER</command> or some forms of <command>ALTER TABLE</command>.
-For user tables, even the initial filenode will be different than the table OID.
+Note that while a table's filenode often matches its OID, this is
+<emphasis>not</emphasis> necessarily the case; some operations, like
+<command>TRUNCATE</command>, <command>REINDEX</command>, <command>CLUSTER</command> and some forms
+of <command>ALTER TABLE</command>, can change the filenode while preserving the OID.
+Avoid assuming that filenode and table OID are the same.
Also, for certain system catalogs including <structname>pg_class</structname> itself,
<structname>pg_class</structname>.<structfield>relfilenode</structfield> contains zero. The
actual filenode number of these catalogs is stored in a lower-level data
BlockNumber blknum;
BufferGetTag(buffer, &locator, &forknum, &blknum);
- elog(ERROR, "failed to add item to index page in %u/%u/" UINT64_FORMAT,
+ elog(ERROR, "failed to add item to index page in %u/%u/%u",
locator.spcOid, locator.dbOid, locator.relNumber);
}
}
static void
out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec)
{
- appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; blk %u; latestRemovedXid %u:%u",
+ appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber, xlrec->block,
EpochFromFullTransactionId(xlrec->latestRemovedFullXid),
{
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
- appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; tid %u/%u",
+ appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
xlrec->target_locator.spcOid,
xlrec->target_locator.dbOid,
xlrec->target_locator.relNumber,
{
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec;
- appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; latestRemovedXid %u:%u",
+ appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber,
EpochFromFullTransactionId(xlrec->latestRemovedFullXid),
xl_seq_rec *xlrec = (xl_seq_rec *) rec;
if (info == XLOG_SEQ_LOG)
- appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT,
+ appendStringInfo(buf, "rel %u/%u/%u",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber);
}
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "redo %X/%X; "
- "tli %u; prev tli %u; fpw %s; xid %u:%u; relfilenumber " UINT64_FORMAT "; oid %u; "
- "multi %u; offset %u; oldest xid %u in DB %u; oldest multi %u in DB %u; "
+ "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; "
+ "oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; "
"oldest running xid %u; %s",
LSN_FORMAT_ARGS(checkpoint->redo),
checkpoint->fullPageWrites ? "true" : "false",
EpochFromFullTransactionId(checkpoint->nextXid),
XidFromFullTransactionId(checkpoint->nextXid),
- checkpoint->nextRelFileNumber,
checkpoint->nextOid,
checkpoint->nextMulti,
checkpoint->nextMultiOffset,
memcpy(&nextOid, rec, sizeof(Oid));
appendStringInfo(buf, "%u", nextOid);
}
- else if (info == XLOG_NEXT_RELFILENUMBER)
- {
- RelFileNumber nextRelFileNumber;
-
- memcpy(&nextRelFileNumber, rec, sizeof(RelFileNumber));
- appendStringInfo(buf, UINT64_FORMAT, nextRelFileNumber);
- }
else if (info == XLOG_RESTORE_POINT)
{
xl_restore_point *xlrec = (xl_restore_point *) rec;
case XLOG_NEXTOID:
id = "NEXTOID";
break;
- case XLOG_NEXT_RELFILENUMBER:
- id = "NEXT_RELFILENUMBER";
- break;
case XLOG_SWITCH:
id = "SWITCH";
break;
appendStringInfoChar(buf, ' ');
appendStringInfo(buf,
- "blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u",
+ "blkref #%d: rel %u/%u/%u fork %s blk %u",
block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forkNames[forknum],
if (forknum != MAIN_FORKNUM)
{
appendStringInfo(buf,
- ", blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u",
+ ", blkref #%d: rel %u/%u/%u fork %s blk %u",
block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forkNames[forknum],
else
{
appendStringInfo(buf,
- ", blkref #%d: rel %u/%u/" UINT64_FORMAT " blk %u",
+ ", blkref #%d: rel %u/%u/%u blk %u",
block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
blk);
entry in pg_class, but that currently isn't done because of the possibility
of deleting data that is useful for forensic analysis of the crash.
Orphan files are harmless --- at worst they waste a bit of disk space ---
-because the relfilenumber counter is monotonically increasing. The maximum
-value is 2^56-1, and there is no provision for wraparound. Thus, on-disk
-collisions aren't possible.
+because we check for on-disk collisions when allocating new relfilenumber
+OIDs. So cleaning up isn't really necessary.
3. Deleting a table, which requires an unlink() that could fail.
#include "postgres.h"
-#include <unistd.h>
-
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/subtrans.h"
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlogutils.h"
-#include "catalog/pg_class.h"
-#include "catalog/pg_tablespace.h"
#include "commands/dbcommands.h"
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
/* Number of OIDs to prefetch (preallocate) per XLOG write */
#define VAR_OID_PREFETCH 8192
-/* Number of RelFileNumbers to be logged per XLOG write */
-#define VAR_RELNUMBER_PER_XLOG 512
-
-/*
- * Need to log more if remaining logged RelFileNumbers are less than the
- * threshold. Valid range could be between 0 to VAR_RELNUMBER_PER_XLOG - 1.
- */
-#define VAR_RELNUMBER_NEW_XLOG_THRESHOLD 256
-
/* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL;
* wide, counter wraparound will occur eventually, and therefore it is unwise
* to assume they are unique unless precautions are taken to make them so.
* Hence, this routine should generally not be used directly. The only direct
- * caller should be GetNewOidWithIndex() in catalog/catalog.c.
+ * callers should be GetNewOidWithIndex() and GetNewRelFileNumber() in
+ * catalog/catalog.c.
*/
Oid
GetNewObjectId(void)
LWLockRelease(OidGenLock);
}
-/*
- * GetNewRelFileNumber
- *
- * Similar to GetNewObjectId but instead of new Oid it generates new
- * relfilenumber.
- */
-RelFileNumber
-GetNewRelFileNumber(Oid reltablespace, char relpersistence)
-{
- RelFileNumber result;
- RelFileNumber nextRelFileNumber,
- loggedRelFileNumber,
- flushedRelFileNumber;
-
- StaticAssertStmt(VAR_RELNUMBER_NEW_XLOG_THRESHOLD < VAR_RELNUMBER_PER_XLOG,
- "VAR_RELNUMBER_NEW_XLOG_THRESHOLD must be smaller than VAR_RELNUMBER_PER_XLOG");
-
- /* safety check, we should never get this far in a HS standby */
- if (RecoveryInProgress())
- elog(ERROR, "cannot assign RelFileNumber during recovery");
-
- if (IsBinaryUpgrade)
- elog(ERROR, "cannot assign RelFileNumber during binary upgrade");
-
- LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
-
- nextRelFileNumber = ShmemVariableCache->nextRelFileNumber;
- loggedRelFileNumber = ShmemVariableCache->loggedRelFileNumber;
- flushedRelFileNumber = ShmemVariableCache->flushedRelFileNumber;
-
- Assert(nextRelFileNumber <= flushedRelFileNumber);
- Assert(flushedRelFileNumber <= loggedRelFileNumber);
-
- /* check for the wraparound for the relfilenumber counter */
- if (unlikely(nextRelFileNumber > MAX_RELFILENUMBER))
- elog(ERROR, "relfilenumber is too large");
-
- /*
- * If the remaining logged relfilenumbers values are less than the
- * threshold value then log more. Ideally, we can wait until all
- * relfilenumbers have been consumed before logging more. Nevertheless, if
- * we do that, we must immediately flush the logged wal record because we
- * want to ensure that the nextRelFileNumber is always larger than any
- * relfilenumber already in use on disk. And, to maintain that invariant,
- * we must make sure that the record we log reaches the disk before any new
- * files are created with the newly logged range.
- *
- * So in order to avoid flushing the wal immediately, we always log before
- * consuming all the relfilenumber, and now we only have to flush the newly
- * logged relfilenumber wal before consuming the relfilenumber from this
- * new range. By the time we need to flush this wal, hopefully, those have
- * already been flushed with some other XLogFlush operation.
- */
- if (loggedRelFileNumber - nextRelFileNumber <=
- VAR_RELNUMBER_NEW_XLOG_THRESHOLD)
- {
- XLogRecPtr recptr;
-
- loggedRelFileNumber = loggedRelFileNumber + VAR_RELNUMBER_PER_XLOG;
- recptr = LogNextRelFileNumber(loggedRelFileNumber);
- ShmemVariableCache->loggedRelFileNumber = loggedRelFileNumber;
-
- /* remember for the future flush */
- ShmemVariableCache->loggedRelFileNumberRecPtr = recptr;
- }
-
- /*
- * If the nextRelFileNumber is already reached to the already flushed
- * relfilenumber then flush the WAL for previously logged relfilenumber.
- */
- if (nextRelFileNumber >= flushedRelFileNumber)
- {
- XLogFlush(ShmemVariableCache->loggedRelFileNumberRecPtr);
- ShmemVariableCache->flushedRelFileNumber = loggedRelFileNumber;
- }
-
- result = ShmemVariableCache->nextRelFileNumber;
-
- /* we should never be using any relfilenumber outside the flushed range */
- Assert(result <= ShmemVariableCache->flushedRelFileNumber);
-
- (ShmemVariableCache->nextRelFileNumber)++;
-
- LWLockRelease(RelFileNumberGenLock);
-
- /*
- * Because the RelFileNumber counter only ever increases and never wraps
- * around, it should be impossible for the newly-allocated RelFileNumber to
- * already be in use. But, if Asserts are enabled, double check that
- * there's no main-fork relation file with the new RelFileNumber already on
- * disk.
- */
-#ifdef USE_ASSERT_CHECKING
- {
- RelFileLocatorBackend rlocator;
- char *rpath;
- BackendId backend;
-
- switch (relpersistence)
- {
- case RELPERSISTENCE_TEMP:
- backend = BackendIdForTempRelations();
- break;
- case RELPERSISTENCE_UNLOGGED:
- case RELPERSISTENCE_PERMANENT:
- backend = InvalidBackendId;
- break;
- default:
- elog(ERROR, "invalid relpersistence: %c", relpersistence);
- }
-
- /* this logic should match RelationInitPhysicalAddr */
- rlocator.locator.spcOid =
- reltablespace ? reltablespace : MyDatabaseTableSpace;
- rlocator.locator.dbOid = (reltablespace == GLOBALTABLESPACE_OID) ?
- InvalidOid : MyDatabaseId;
- rlocator.locator.relNumber = result;
-
- /*
- * The relpath will vary based on the backend ID, so we must
- * initialize that properly here to make sure that any collisions
- * based on filename are properly detected.
- */
- rlocator.backend = backend;
-
- /* check for existing file of same name. */
- rpath = relpath(rlocator, MAIN_FORKNUM);
- Assert(access(rpath, F_OK) != 0);
- }
-#endif
-
- return result;
-}
-
-/*
- * SetNextRelFileNumber
- *
- * This may only be called during pg_upgrade; it advances the RelFileNumber
- * counter to the specified value if the current value is smaller than the
- * input value.
- */
-void
-SetNextRelFileNumber(RelFileNumber relnumber)
-{
- /* safety check, we should never get this far in a HS standby */
- if (RecoveryInProgress())
- elog(ERROR, "cannot set RelFileNumber during recovery");
-
- if (!IsBinaryUpgrade)
- elog(ERROR, "RelFileNumber can be set only during binary upgrade");
-
- LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
-
- /*
- * If previous assigned value of the nextRelFileNumber is already higher
- * than the current value then nothing to be done. This is possible
- * because during upgrade the objects are not created in relfilenumber
- * order.
- */
- if (relnumber <= ShmemVariableCache->nextRelFileNumber)
- {
- LWLockRelease(RelFileNumberGenLock);
- return;
- }
-
- /*
- * If the new relfilenumber to be set is greater than or equal to already
- * flushed relfilenumber then log more and flush immediately.
- *
- * (This is less efficient than GetNewRelFileNumber, which arranges to
- * log some new relfilenumbers before the old batch is exhausted in the
- * hope that a flush will happen in the background before any values are
- * needed from the new batch. However, since this is only used during
- * binary upgrade, it shouldn't really matter.)
- */
- if (relnumber >= ShmemVariableCache->flushedRelFileNumber)
- {
- RelFileNumber newlogrelnum;
-
- newlogrelnum = relnumber + VAR_RELNUMBER_PER_XLOG;
- XLogFlush(LogNextRelFileNumber(newlogrelnum));
-
- /* we have flushed whatever we have logged so no pending flush */
- ShmemVariableCache->loggedRelFileNumber = newlogrelnum;
- ShmemVariableCache->flushedRelFileNumber = newlogrelnum;
- ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr;
- }
-
- ShmemVariableCache->nextRelFileNumber = relnumber;
-
- LWLockRelease(RelFileNumberGenLock);
-}
-
/*
* StopGeneratingPinnedObjectIds
*
checkPoint.nextXid =
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
- checkPoint.nextRelFileNumber = FirstNormalRelFileNumber;
checkPoint.nextMulti = FirstMultiXactId;
checkPoint.nextMultiOffset = 0;
checkPoint.oldestXid = FirstNormalTransactionId;
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
- ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0;
- ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
- ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
- ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
/* initialize shared memory variables from the checkpoint record */
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
- ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0;
- ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
- ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
checkPoint.nextOid += ShmemVariableCache->oidCount;
LWLockRelease(OidGenLock);
- /*
- * If this is a shutdown checkpoint then we can safely start allocating
- * relfilenumber from the nextRelFileNumber value after the restart because
- * no one one else can use the relfilenumber beyond that number before the
- * shutdown. OTOH, if it is a normal checkpoint then if there is a crash
- * after this point then we might end up reusing the same relfilenumbers
- * after the restart so we need to set the nextRelFileNumber to the already
- * logged relfilenumber as no one will use number beyond this limit without
- * logging again.
- */
- LWLockAcquire(RelFileNumberGenLock, LW_SHARED);
- if (shutdown)
- checkPoint.nextRelFileNumber = ShmemVariableCache->nextRelFileNumber;
- else
- checkPoint.nextRelFileNumber = ShmemVariableCache->loggedRelFileNumber;
-
- LWLockRelease(RelFileNumberGenLock);
-
MultiXactGetCheckptMulti(shutdown,
&checkPoint.nextMulti,
&checkPoint.nextMultiOffset,
*/
}
-/*
- * Similar to the XLogPutNextOid but instead of writing NEXTOID log record it
- * writes a NEXT_RELFILENUMBER log record. It also returns the XLogRecPtr of
- * the currently logged relfilenumber record, so that the caller can flush it
- * at the appropriate time.
- */
-XLogRecPtr
-LogNextRelFileNumber(RelFileNumber nextrelnumber)
-{
- XLogRecPtr recptr;
-
- XLogBeginInsert();
- XLogRegisterData((char *) (&nextrelnumber), sizeof(RelFileNumber));
- recptr = XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENUMBER);
-
- return recptr;
-}
-
/*
* Write an XLOG SWITCH record.
*
ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock);
}
- if (info == XLOG_NEXT_RELFILENUMBER)
- {
- RelFileNumber nextRelFileNumber;
-
- memcpy(&nextRelFileNumber, XLogRecGetData(record), sizeof(RelFileNumber));
- LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
- ShmemVariableCache->nextRelFileNumber = nextRelFileNumber;
- ShmemVariableCache->loggedRelFileNumber = nextRelFileNumber;
- ShmemVariableCache->flushedRelFileNumber = nextRelFileNumber;
- LWLockRelease(RelFileNumberGenLock);
- }
else if (info == XLOG_CHECKPOINT_SHUTDOWN)
{
CheckPoint checkPoint;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock);
- LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
- ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
- ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
- ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
- LWLockRelease(RelFileNumberGenLock);
MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, which creates the relation",
+ "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation",
xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber,
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, which truncates the relation",
+ "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation",
xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber,
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing all prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, because the relation does not exist on disk",
+ "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk",
reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber,
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, because the relation is too small",
+ "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small",
reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber,
* truncated beneath our feet?
*/
elog(ERROR,
- "could not prefetch relation %u/%u/" UINT64_FORMAT " block %u",
+ "could not prefetch relation %u/%u/%u block %u",
reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber,
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)",
+ "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed),
filter->filter_from_block);
{
#ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL,
- "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (whole database)",
+ "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed));
#endif
continue;
if (forknum != MAIN_FORKNUM)
- appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", fork %u, blk %u",
+ appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
block_id,
rlocator.spcOid, rlocator.dbOid,
rlocator.relNumber,
forknum,
blk);
else
- appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", blk %u",
+ appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
block_id,
rlocator.spcOid, rlocator.dbOid,
rlocator.relNumber,
if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
{
elog(FATAL,
- "inconsistent page found, rel %u/%u/" UINT64_FORMAT ", forknum %u, blkno %u",
+ "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forknum, blkno);
}
rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
/* We don't know the name of the relation; use relfilenumber instead */
- sprintf(RelationGetRelationName(rel), UINT64_FORMAT, rlocator.relNumber);
+ sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
/*
* We set up the lockRelId in case anything tries to lock the dummy
- * relation. Note that this is fairly bogus since relNumber are completely
+ * relation. Note that this is fairly bogus since relNumber may be
* different from the relation's OID. It shouldn't really matter though.
* In recovery, we are running by ourselves and can't have any lock
* conflicts. While syncing, we already hold AccessExclusiveLock.
*/
rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
- rel->rd_lockInfo.lockRelId.relId = (Oid) rlocator.relNumber;
+ rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
rel->rd_smgr = NULL;
if (relForkNum != INIT_FORKNUM)
{
char initForkFile[MAXPGPATH];
- char relNumber[RELNUMBERCHARS + 1];
+ char relNumber[OIDCHARS + 1];
/*
* If any other type of fork, check if there is an init fork
return newOid;
}
+/*
+ * GetNewRelFileNumber
+ * Generate a new relfilenumber that is unique within the
+ * database of the given tablespace.
+ *
+ * If the relfilenumber will also be used as the relation's OID, pass the
+ * opened pg_class catalog, and this routine will guarantee that the result
+ * is also an unused OID within pg_class. If the result is to be used only
+ * as a relfilenumber for an existing relation, pass NULL for pg_class.
+ *
+ * As with GetNewOidWithIndex(), there is some theoretical risk of a race
+ * condition, but it doesn't seem worth worrying about.
+ *
+ * Note: we don't support using this in bootstrap mode. All relations
+ * created by bootstrap have preassigned OIDs, so there's no need.
+ */
+RelFileNumber
+GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
+{
+ RelFileLocatorBackend rlocator;
+ char *rpath;
+ bool collides;
+ BackendId backend;
+
+ /*
+ * If we ever get here during pg_upgrade, there's something wrong; all
+ * relfilenumber assignments during a binary-upgrade run should be
+ * determined by commands in the dump script.
+ */
+ Assert(!IsBinaryUpgrade);
+
+ switch (relpersistence)
+ {
+ case RELPERSISTENCE_TEMP:
+ backend = BackendIdForTempRelations();
+ break;
+ case RELPERSISTENCE_UNLOGGED:
+ case RELPERSISTENCE_PERMANENT:
+ backend = InvalidBackendId;
+ break;
+ default:
+ elog(ERROR, "invalid relpersistence: %c", relpersistence);
+ return InvalidRelFileNumber; /* placate compiler */
+ }
+
+ /* This logic should match RelationInitPhysicalAddr */
+ rlocator.locator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace;
+ rlocator.locator.dbOid =
+ (rlocator.locator.spcOid == GLOBALTABLESPACE_OID) ?
+ InvalidOid : MyDatabaseId;
+
+ /*
+ * The relpath will vary based on the backend ID, so we must initialize
+ * that properly here to make sure that any collisions based on filename
+ * are properly detected.
+ */
+ rlocator.backend = backend;
+
+ do
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /* Generate the OID */
+ if (pg_class)
+ rlocator.locator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId,
+ Anum_pg_class_oid);
+ else
+ rlocator.locator.relNumber = GetNewObjectId();
+
+ /* Check for existing file of same name */
+ rpath = relpath(rlocator, MAIN_FORKNUM);
+
+ if (access(rpath, F_OK) == 0)
+ {
+ /* definite collision */
+ collides = true;
+ }
+ else
+ {
+ /*
+ * Here we have a little bit of a dilemma: if errno is something
+ * other than ENOENT, should we declare a collision and loop? In
+ * practice it seems best to go ahead regardless of the errno. If
+ * there is a colliding file we will get an smgr failure when we
+ * attempt to create the new relation file.
+ */
+ collides = false;
+ }
+
+ pfree(rpath);
+ } while (collides);
+
+ return rlocator.locator.relNumber;
+}
+
/*
* SQL callable interface for GetNewOidWithIndex(). Outside of initdb's
* direct insertions into catalog tables, and recovering from corruption, this
else
{
/*
- * If relfilenumber is unspecified by the caller then allocate a new
- * one, except for system tables, for which we make the initial
- * relfilenumber the same as the table OID. See the comments for
- * FirstNormalRelFileNumber for an explanation of why we do this.
+ * If relfilenumber is unspecified by the caller then create storage
+ * with oid same as relid.
*/
if (!RelFileNumberIsValid(relfilenumber))
- {
- if (relid < FirstNormalObjectId)
- relfilenumber = relid;
- else
- relfilenumber = GetNewRelFileNumber(reltablespace,
- relpersistence);
- }
+ relfilenumber = relid;
}
/*
values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype);
values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner);
values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam);
- values[Anum_pg_class_relfilenode - 1] = Int64GetDatum(rd_rel->relfilenode);
+ values[Anum_pg_class_relfilenode - 1] = ObjectIdGetDatum(rd_rel->relfilenode);
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
if (shared_relation && reltablespace != GLOBALTABLESPACE_OID)
elog(ERROR, "shared relations must be placed in pg_global tablespace");
- /* Allocate an OID for the relation, unless we were told what to use. */
+ /*
+ * Allocate an OID for the relation, unless we were told what to use.
+ *
+ * The OID will be the relfilenumber as well, so make sure it doesn't
+ * collide with either pg_class OIDs or existing physical files.
+ */
if (!OidIsValid(relid))
{
/* Use binary-upgrade override for pg_class.oid and relfilenumber */
}
if (!OidIsValid(relid))
- relid = GetNewOidWithIndex(pg_class_desc, ClassOidIndexId,
- Anum_pg_class_oid);
+ relid = GetNewRelFileNumber(reltablespace, pg_class_desc,
+ relpersistence);
}
/*
collationObjectId,
classObjectId);
- /* Allocate an OID for the index, unless we were told what to use. */
+ /*
+ * Allocate an OID for the index, unless we were told what to use.
+ *
+ * The OID will be the relfilenumber as well, so make sure it doesn't
+ * collide with either pg_class OIDs or existing physical files.
+ */
if (!OidIsValid(indexRelationId))
{
/* Use binary-upgrade override for pg_class.oid and relfilenumber */
}
else
{
- indexRelationId = GetNewOidWithIndex(pg_class, ClassOidIndexId,
- Anum_pg_class_oid);
+ indexRelationId =
+ GetNewRelFileNumber(tableSpaceId, pg_class, relpersistence);
}
}
xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
SMgrRelation reln;
- if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber)
- elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT,
- xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber);
-
reln = smgropen(xlrec->rlocator, InvalidBackendId);
smgrcreate(reln, xlrec->forkNum, true);
}
int nforks = 0;
bool need_fsm_vacuum = false;
- if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber)
- elog(ERROR, "unexpected relnumber " UINT64_FORMAT " is bigger than nextRelFileNumber " UINT64_FORMAT,
- xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber);
-
reln = smgropen(xlrec->rlocator, InvalidBackendId);
/*
}
/*
- * Generate a new relfilenumber. We cannot reuse the old relfilenumber
- * because of the possibility that that relation will be moved back to the
- * original tablespace before the next checkpoint. At that point, the
- * first segment of the main fork won't have been unlinked yet, and an
- * attempt to create new relation storage with that same relfilenumber
- * will fail.
- */
- newrelfilenumber = GetNewRelFileNumber(newTableSpace,
+ * Relfilenumbers are not unique in databases across tablespaces, so we
+ * need to allocate a new one in the new tablespace.
+ */
+ newrelfilenumber = GetNewRelFileNumber(newTableSpace, NULL,
rel->rd_rel->relpersistence);
/* Open old and new relation */
* parts.
*/
if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +
- OIDCHARS + 1 + RELNUMBERCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH)
+ OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("tablespace location \"%s\" is too long",
print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
}
- elsif ($t eq 'uint64' || $t eq 'RelFileNumber')
+ elsif ($t eq 'uint64')
{
print $off "\tWRITE_UINT64_FIELD($f);\n";
print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read;
}
- elsif ($t eq 'Oid')
+ elsif ($t eq 'Oid' || $t eq 'RelFileNumber')
{
print $off "\tWRITE_OID_FIELD($f);\n";
print $rff "\tREAD_OID_FIELD($f);\n" unless $no_read;
break;
case XLOG_NOOP:
case XLOG_NEXTOID:
- case XLOG_NEXT_RELFILENUMBER:
case XLOG_SWITCH:
case XLOG_BACKUP_END:
case XLOG_PARAMETER_CHANGE:
hash_seq_init(&hstat, tuplecid_data);
while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL)
{
- elog(DEBUG3, "mapping: node: %u/%u/" UINT64_FORMAT " tid: %u/%u cmin: %u, cmax: %u",
+ elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
ent->key.rlocator.dbOid,
ent->key.rlocator.spcOid,
ent->key.rlocator.relNumber,
typedef struct
{
- RelFileNumber relnumber; /* hash key */
+ Oid reloid; /* hash key */
} unlogged_relation_entry;
/*
* need to be reset. Otherwise, this cleanup operation would be
* O(n^2).
*/
- ctl.keysize = sizeof(RelFileNumber);
+ ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(unlogged_relation_entry);
ctl.hcxt = CurrentMemoryContext;
- hash = hash_create("unlogged relation RelFileNumbers", 32, &ctl,
+ hash = hash_create("unlogged relation OIDs", 32, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* Scan the directory. */
continue;
/*
- * Put the RELFILENUMBER portion of the name into the hash table,
- * if it isn't already.
+ * Put the OID portion of the name into the hash table, if it
+ * isn't already.
*/
- ent.relnumber = atorelnumber(de->d_name);
+ ent.reloid = atooid(de->d_name);
(void) hash_search(hash, &ent, HASH_ENTER, NULL);
}
continue;
/*
- * See whether the RELFILENUMBER portion of the name shows up in
- * the hash table. If so, nuke it!
+ * See whether the OID portion of the name shows up in the hash
+ * table. If so, nuke it!
*/
- ent.relnumber = atorelnumber(de->d_name);
+ ent.reloid = atooid(de->d_name);
if (hash_search(hash, &ent, HASH_FIND, NULL))
{
snprintf(rm_path, sizeof(rm_path), "%s/%s",
{
ForkNumber forkNum;
int relnumchars;
- char relnumbuf[RELNUMBERCHARS + 1];
+ char relnumbuf[OIDCHARS + 1];
char srcpath[MAXPGPATH * 2];
char dstpath[MAXPGPATH];
{
ForkNumber forkNum;
int relnumchars;
- char relnumbuf[RELNUMBERCHARS + 1];
+ char relnumbuf[OIDCHARS + 1];
char mainpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */
* for a non-temporary relation and false otherwise.
*
* NB: If this function returns true, the caller is entitled to assume that
- * *relnumchars has been set to a value no more than RELNUMBERCHARS, and thus
- * that a buffer of RELNUMBERCHARS+1 characters is sufficient to hold the
+ * *relnumchars has been set to a value no more than OIDCHARS, and thus
+ * that a buffer of OIDCHARS+1 characters is sufficient to hold the
* RelFileNumber portion of the filename. This is critical to protect against
* a possible buffer overrun.
*/
/* Look for a non-empty string of digits (that isn't too long). */
for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
;
- if (pos == 0 || pos > RELNUMBERCHARS)
+ if (pos == 0 || pos > OIDCHARS)
return false;
*relnumchars = pos;
BlockNumber blknum;
BufferGetTag(buf, &rlocator, &forknum, &blknum);
- elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/" UINT64_FORMAT,
+ elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
/* make sure we hold an exclusive lock */
# 45 was XactTruncationLock until removal of BackendRandomLock
WrapLimitsVacuumLock 46
NotifyQueueTailLock 47
-RelFileNumberGenLock 48
\ No newline at end of file
* next checkpoint, we prevent reassignment of the relfilenumber until it's
* safe, because relfilenumber assignment skips over any existing file.
*
- * XXX. Although all of this was true when relfilenumbers were 32 bits wide,
- * they are now 56 bits wide and do not wrap around, so in the future we can
- * change the code to immediately unlink the first segment of the relation
- * along with all the others. We still do reuse relfilenumbers when createdb()
- * is performed using the file-copy method or during movedb(), but the scenario
- * described above can only happen when creating a new relation.
- *
* We do not need to go through this dance for temp relations, though, because
* we never make WAL entries for temp rels, and so a temp rel poses no threat
* to the health of a regular rel that has taken over its relfilenumber.
/* First time through: initialize the hash table */
HASHCTL ctl;
- ctl.keysize = SizeOfRelFileLocatorBackend;
+ ctl.keysize = sizeof(RelFileLocatorBackend);
ctl.entrysize = sizeof(SMgrRelationData);
SMgrRelationHash = hash_create("smgr relation table", 400,
&ctl, HASH_ELEM | HASH_BLOBS);
if (!RelFileNumberIsValid(result))
PG_RETURN_NULL();
- PG_RETURN_INT64(result);
+ PG_RETURN_OID(result);
}
/*
pg_filenode_relation(PG_FUNCTION_ARGS)
{
Oid reltablespace = PG_GETARG_OID(0);
- RelFileNumber relfilenumber = PG_GETARG_INT64(1);
+ RelFileNumber relfilenumber = PG_GETARG_OID(1);
Oid heaprel;
- /* check whether the relfilenumber is within a valid range */
- CHECK_RELFILENUMBER_RANGE(relfilenumber);
-
/* test needed so RelidByRelfilenumber doesn't misbehave */
if (!RelFileNumberIsValid(relfilenumber))
PG_RETURN_NULL();
#include "catalog/pg_type.h"
#include "commands/extension.h"
#include "miscadmin.h"
-#include "storage/relfilelocator.h"
#include "utils/array.h"
#include "utils/builtins.h"
Datum
binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS)
{
- RelFileNumber relfilenumber = PG_GETARG_INT64(0);
+ RelFileNumber relfilenumber = PG_GETARG_OID(0);
CHECK_IS_BINARY_UPGRADE;
- CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber;
- SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID();
}
Datum
binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS)
{
- RelFileNumber relfilenumber = PG_GETARG_INT64(0);
+ RelFileNumber relfilenumber = PG_GETARG_OID(0);
CHECK_IS_BINARY_UPGRADE;
- CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber;
- SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID();
}
Datum
binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS)
{
- RelFileNumber relfilenumber = PG_GETARG_INT64(0);
+ RelFileNumber relfilenumber = PG_GETARG_OID(0);
CHECK_IS_BINARY_UPGRADE;
- CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber;
- SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID();
}
*/
RelFileLocatorBackend rlocator;
- rlocator.locator.dbOid = msg->sm.dbOid;
- rlocator.locator.spcOid = msg->sm.spcOid;
- rlocator.locator.relNumber = (((uint64) msg->sm.relNumber_hi) << 32) | msg->sm.relNumber_lo;
+ rlocator.locator = msg->sm.rlocator;
rlocator.backend = (msg->sm.backend_hi << 16) | (int) msg->sm.backend_lo;
smgrcloserellocator(rlocator);
}
msg.sm.id = SHAREDINVALSMGR_ID;
msg.sm.backend_hi = rlocator.backend >> 16;
msg.sm.backend_lo = rlocator.backend & 0xffff;
- msg.sm.dbOid = rlocator.locator.dbOid;
- msg.sm.spcOid = rlocator.locator.spcOid;
- msg.sm.relNumber_hi = rlocator.locator.relNumber >> 32;
- msg.sm.relNumber_lo = rlocator.locator.relNumber & 0xffffffff;
+ msg.sm.rlocator = rlocator.locator;
/* check AddCatcacheInvalidationMessage() for an explanation */
VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
{
/* Allocate a new relfilenumber */
newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
- persistence);
+ NULL, persistence);
}
else if (relation->rd_rel->relkind == RELKIND_INDEX)
{
InitializeRelfilenumberMap(void)
{
HASHCTL ctl;
+ int i;
/* Make sure we've initialized CacheMemoryContext. */
if (CacheMemoryContext == NULL)
/* build skey */
MemSet(&relfilenumber_skey, 0, sizeof(relfilenumber_skey));
- fmgr_info_cxt(F_OIDEQ,
- &relfilenumber_skey[0].sk_func,
- CacheMemoryContext);
- relfilenumber_skey[0].sk_strategy = BTEqualStrategyNumber;
- relfilenumber_skey[0].sk_subtype = InvalidOid;
- relfilenumber_skey[0].sk_collation = InvalidOid;
- relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace;
+ for (i = 0; i < 2; i++)
+ {
+ fmgr_info_cxt(F_OIDEQ,
+ &relfilenumber_skey[i].sk_func,
+ CacheMemoryContext);
+ relfilenumber_skey[i].sk_strategy = BTEqualStrategyNumber;
+ relfilenumber_skey[i].sk_subtype = InvalidOid;
+ relfilenumber_skey[i].sk_collation = InvalidOid;
+ }
- fmgr_info_cxt(F_INT8EQ,
- &relfilenumber_skey[1].sk_func,
- CacheMemoryContext);
- relfilenumber_skey[1].sk_strategy = BTEqualStrategyNumber;
- relfilenumber_skey[1].sk_subtype = InvalidOid;
- relfilenumber_skey[1].sk_collation = InvalidOid;
+ relfilenumber_skey[0].sk_attno = Anum_pg_class_reltablespace;
relfilenumber_skey[1].sk_attno = Anum_pg_class_relfilenode;
/*
/* set scan arguments */
skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
- skey[1].sk_argument = Int64GetDatum((int64) relfilenumber);
+ skey[1].sk_argument = ObjectIdGetDatum(relfilenumber);
scandesc = systable_beginscan(relation,
ClassTblspcRelfilenodeIndexId,
if (found)
elog(ERROR,
- "unexpected duplicate for tablespace %u, relfilenumber " UINT64_FORMAT,
+ "unexpected duplicate for tablespace %u, relfilenumber %u",
reltablespace, relfilenumber);
found = true;
Datum
pg_control_checkpoint(PG_FUNCTION_ARGS)
{
- Datum values[19];
- bool nulls[19];
+ Datum values[18];
+ bool nulls[18];
TupleDesc tupdesc;
HeapTuple htup;
ControlFileData *ControlFile;
XIDOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time",
TIMESTAMPTZOID, -1, 0);
- TupleDescInitEntry(tupdesc, (AttrNumber) 19, "next_relfilenumber",
- INT8OID, -1, 0);
tupdesc = BlessTupleDesc(tupdesc);
/* Read the control file. */
values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time));
nulls[17] = false;
- values[18] = Int64GetDatum((int64) ControlFile->checkPointCopy.nextRelFileNumber);
- nulls[18] = false;
-
htup = heap_form_tuple(tupdesc, values, nulls);
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
mode = PG_MODE_ENABLE;
break;
case 'f':
- if (!option_parse_relfilenumber(optarg, "-f/--filenode"))
+ if (!option_parse_int(optarg, "-f/--filenode", 0,
+ INT_MAX,
+ NULL))
exit(1);
only_filenode = pstrdup(optarg);
break;
printf(_("Latest checkpoint's NextXID: %u:%u\n"),
EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid),
XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid));
- printf(_("Latest checkpoint's NextRelFileNumber:%llu\n"),
- (unsigned long long) ControlFile->checkPointCopy.nextRelFileNumber);
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
atooid(PQgetvalue(lo_res, i, ii_oid)));
oid = atooid(PQgetvalue(lo_res, i, ii_oid));
- relfilenumber = atorelnumber(PQgetvalue(lo_res, i, ii_relfilenode));
+ relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
if (oid == LargeObjectRelationId)
appendPQExpBuffer(loOutQry,
- "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
else if (oid == LargeObjectLOidPNIndexId)
appendPQExpBuffer(loOutQry,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
}
relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind"));
- relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "relfilenode")));
+ relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "relfilenode")));
toast_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "reltoastrelid")));
- toast_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "toast_relfilenode")));
+ toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "toast_relfilenode")));
toast_index_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "indexrelid")));
- toast_index_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "toast_index_relfilenode")));
+ toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "toast_index_relfilenode")));
appendPQExpBufferStr(upgrade_buffer,
"\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n");
*/
if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE)
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
/*
"SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n",
toast_relfilenumber);
/* every toast table has an index */
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_index_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
toast_index_relfilenumber);
}
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
pg_class_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
relfilenumber);
}
segNo = 0;
matched = false;
- nmatch = sscanf(path, "global/" UINT64_FORMAT ".%u", &rlocator.relNumber, &segNo);
+ nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
if (nmatch == 1 || nmatch == 2)
{
rlocator.spcOid = GLOBALTABLESPACE_OID;
}
else
{
- nmatch = sscanf(path, "base/%u/" UINT64_FORMAT ".%u",
+ nmatch = sscanf(path, "base/%u/%u.%u",
&rlocator.dbOid, &rlocator.relNumber, &segNo);
if (nmatch == 2 || nmatch == 3)
{
}
else
{
- nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" UINT64_FORMAT ".%u",
+ nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
&rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
&segNo);
if (nmatch == 3 || nmatch == 4)
relname = PQgetvalue(res, relnum, i_relname);
curr->relname = pg_strdup(relname);
- curr->relfilenumber =
- atorelnumber(PQgetvalue(res, relnum, i_relfilenumber));
+ curr->relfilenumber = atooid(PQgetvalue(res, relnum, i_relfilenumber));
curr->tblsp_alloc = false;
/* Is the tablespace oid non-default? */
* oids are the same between old and new clusters. This is important
* because toast oids are stored as toast pointers in user tables.
*
- * We control assignments of pg_class.relfilenode because we want the
- * filenames to match between the old and new cluster.
+ * While pg_class.oid and pg_class.relfilenode are initially the same in a
+ * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We
+ * control assignments of pg_class.relfilenode because we want the filenames
+ * to match between the old and new cluster.
*
* We control assignment of pg_tablespace.oid because we want the oid to match
* between the old and new cluster.
else
snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
- snprintf(old_file, sizeof(old_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
+ snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s",
map->old_tablespace,
map->old_tablespace_suffix,
map->db_oid,
map->relfilenumber,
type_suffix,
extent_suffix);
- snprintf(new_file, sizeof(new_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
+ snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s",
map->new_tablespace,
map->new_tablespace_suffix,
map->db_oid,
}
break;
case 'R':
- if (sscanf(optarg, "%u/%u/" UINT64_FORMAT,
+ if (sscanf(optarg, "%u/%u/%u",
&config.filter_by_relation.spcOid,
&config.filter_by_relation.dbOid,
&config.filter_by_relation.relNumber) != 3 ||
# REINDEX operations. A set of relfilenodes is saved from the catalogs
# and then compared with pg_class.
$node->safe_psql('postgres',
- 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode int8);'
+ 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode oid);'
);
# Save the relfilenode of a set of toast indexes, one from the catalog
# pg_constraint and one from the test table.
Assert(dbOid == 0);
Assert(backendId == InvalidBackendId);
if (forkNumber != MAIN_FORKNUM)
- path = psprintf("global/" UINT64_FORMAT "_%s",
+ path = psprintf("global/%u_%s",
relNumber, forkNames[forkNumber]);
else
- path = psprintf("global/" UINT64_FORMAT, relNumber);
+ path = psprintf("global/%u", relNumber);
}
else if (spcOid == DEFAULTTABLESPACE_OID)
{
if (backendId == InvalidBackendId)
{
if (forkNumber != MAIN_FORKNUM)
- path = psprintf("base/%u/" UINT64_FORMAT "_%s",
+ path = psprintf("base/%u/%u_%s",
dbOid, relNumber,
forkNames[forkNumber]);
else
- path = psprintf("base/%u/" UINT64_FORMAT,
+ path = psprintf("base/%u/%u",
dbOid, relNumber);
}
else
{
if (forkNumber != MAIN_FORKNUM)
- path = psprintf("base/%u/t%d_" UINT64_FORMAT "_%s",
+ path = psprintf("base/%u/t%d_%u_%s",
dbOid, backendId, relNumber,
forkNames[forkNumber]);
else
- path = psprintf("base/%u/t%d_" UINT64_FORMAT,
+ path = psprintf("base/%u/t%d_%u",
dbOid, backendId, relNumber);
}
}
if (backendId == InvalidBackendId)
{
if (forkNumber != MAIN_FORKNUM)
- path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT "_%s",
+ path = psprintf("pg_tblspc/%u/%s/%u/%u_%s",
spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, relNumber,
forkNames[forkNumber]);
else
- path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT,
+ path = psprintf("pg_tblspc/%u/%s/%u/%u",
spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, relNumber);
}
else
{
if (forkNumber != MAIN_FORKNUM)
- path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT "_%s",
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s",
spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, backendId, relNumber,
forkNames[forkNumber]);
else
- path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT,
+ path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u",
spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, backendId, relNumber);
}
#include "postgres_fe.h"
#include "common/logging.h"
-#include "common/relpath.h"
#include "common/string.h"
#include "fe_utils/option_utils.h"
*result = val;
return true;
}
-
-/*
- * option_parse_relfilenumber
- *
- * Parse relfilenumber value for an option. If the parsing is successful,
- * returns true; if parsing fails, returns false.
- */
-bool
-option_parse_relfilenumber(const char *optarg, const char *optname)
-{
- char *endptr;
- uint64 val;
-
- errno = 0;
- val = strtou64(optarg, &endptr, 10);
-
- /*
- * Skip any trailing whitespace; if anything but whitespace remains before
- * the terminating character, fail.
- */
- while (*endptr != '\0' && isspace((unsigned char) *endptr))
- endptr++;
-
- if (*endptr != '\0')
- {
- pg_log_error("invalid value \"%s\" for option %s",
- optarg, optname);
- return false;
- }
-
- if (val > MAX_RELFILENUMBER)
- {
- pg_log_error("%s must be in range " UINT64_FORMAT ".." UINT64_FORMAT,
- optname, UINT64CONST(0), MAX_RELFILENUMBER);
- return false;
- }
-
- return true;
-}
#define TRANSAM_H
#include "access/xlogdefs.h"
-#include "common/relpath.h"
/* ----------------
#define FirstUnpinnedObjectId 12000
#define FirstNormalObjectId 16384
-/* ----------
- * RelFileNumbers are normally assigned sequentially beginning with
- * FirstNormalRelFileNumber, but for system tables the initial RelFileNumber
- * is equal to the table OID. This scheme allows pg_upgrade to work: we expect
- * that the new cluster will contain only system tables, and that none of those
- * will have previously been rewritten, so any RelFileNumber which is in use
- * in both the old and new clusters will be used for the same relation in both
- * places.
- *
- * This is important because pg_upgrade can't reactively move conflicting
- * relations out of the way. If it tries to set the RelFileNumber for a
- * relation to some value that's already in use by a different relation, the
- * upgrade will just fail. It's OK if the same RelFileNumber is used for the
- * same relation, though, since then nothing needs to be changed.
- * ----------
- */
-#define FirstNormalRelFileNumber ((RelFileNumber) 100000)
-
-#define CHECK_RELFILENUMBER_RANGE(relfilenumber) \
-do { \
- if ((relfilenumber) < 0 || (relfilenumber) > MAX_RELFILENUMBER) \
- ereport(ERROR, \
- errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
- errmsg("relfilenumber %llu is out of range", \
- (unsigned long long) (relfilenumber))); \
-} while (0)
-
/*
* VariableCache is a data structure in shared memory that is used to track
* OID and XID assignment state. For largely historical reasons, there is
Oid nextOid; /* next OID to assign */
uint32 oidCount; /* OIDs available before must do XLOG work */
- /*
- * These fields are protected by RelFileNumberGenLock.
- */
- RelFileNumber nextRelFileNumber; /* next relfilenumber to assign */
- RelFileNumber loggedRelFileNumber; /* last logged relfilenumber */
- RelFileNumber flushedRelFileNumber; /* last flushed relfilenumber */
- XLogRecPtr loggedRelFileNumberRecPtr; /* xlog record pointer w.r.t.
- * loggedRelFileNumber */
-
/*
* These fields are protected by XidGenLock.
*/
extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
extern bool ForceTransactionIdLimitUpdate(void);
extern Oid GetNewObjectId(void);
-extern RelFileNumber GetNewRelFileNumber(Oid reltablespace,
- char relpersistence);
-extern void SetNextRelFileNumber(RelFileNumber relnumber);
extern void StopGeneratingPinnedObjectIds(void);
#ifdef USE_ASSERT_CHECKING
extern bool CreateRestartPoint(int flags);
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
extern void XLogPutNextOid(Oid nextOid);
-extern XLogRecPtr LogNextRelFileNumber(RelFileNumber nextrelnumber);
extern XLogRecPtr XLogRestorePoint(const char *rpName);
extern void UpdateFullPageWrites(void);
extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
extern Oid GetNewOidWithIndex(Relation relation, Oid indexId,
AttrNumber oidcolumn);
+extern RelFileNumber GetNewRelFileNumber(Oid reltablespace,
+ Relation pg_class,
+ char relpersistence);
#endif /* CATALOG_H */
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202209271
+#define CATALOG_VERSION_NO 202209261
#endif
/* oid */
Oid oid;
- /* access method; 0 if not a table / index */
- Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am);
-
- /* identifier of physical storage file */
- /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
- int64 relfilenode BKI_DEFAULT(0);
-
/* class name */
NameData relname;
/* class owner */
Oid relowner BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid);
+ /* access method; 0 if not a table / index */
+ Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am);
+
+ /* identifier of physical storage file */
+ /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
+ Oid relfilenode BKI_DEFAULT(0);
+
/* identifier of table space for relation (0 means default for database) */
Oid reltablespace BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_tablespace);
DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops));
DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops));
-DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode int8_ops));
+DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops));
#ifdef EXPOSE_TO_CLIENT_CODE
* timeline (equals ThisTimeLineID otherwise) */
bool fullPageWrites; /* current full_page_writes */
FullTransactionId nextXid; /* next free transaction ID */
- RelFileNumber nextRelFileNumber; /* next relfilenumber */
Oid nextOid; /* next free OID */
MultiXactId nextMulti; /* next free MultiXactId */
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
#define XLOG_FPI 0xB0
/* 0xC0 is used in Postgres 9.5-11 */
#define XLOG_OVERWRITE_CONTRECORD 0xD0
-#define XLOG_NEXT_RELFILENUMBER 0xE0
/*
proname => 'pg_indexes_size', provolatile => 'v', prorettype => 'int8',
proargtypes => 'regclass', prosrc => 'pg_indexes_size' },
{ oid => '2999', descr => 'filenode identifier of relation',
- proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'int8',
+ proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'oid',
proargtypes => 'regclass', prosrc => 'pg_relation_filenode' },
{ oid => '3454', descr => 'relation OID for filenode and tablespace',
proname => 'pg_filenode_relation', provolatile => 's',
- prorettype => 'regclass', proargtypes => 'oid int8',
+ prorettype => 'regclass', proargtypes => 'oid oid',
prosrc => 'pg_filenode_relation' },
{ oid => '3034', descr => 'file path of relation',
proname => 'pg_relation_filepath', provolatile => 's', prorettype => 'text',
prosrc => 'binary_upgrade_set_missing_value' },
{ oid => '4545', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_heap_relfilenode', provolatile => 'v',
- proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
+ proparallel => 'u', prorettype => 'void', proargtypes => 'oid',
prosrc => 'binary_upgrade_set_next_heap_relfilenode' },
{ oid => '4546', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_index_relfilenode', provolatile => 'v',
- proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
+ proparallel => 'u', prorettype => 'void', proargtypes => 'oid',
prosrc => 'binary_upgrade_set_next_index_relfilenode' },
{ oid => '4547', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_toast_relfilenode', provolatile => 'v',
- proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
+ proparallel => 'u', prorettype => 'void', proargtypes => 'oid',
prosrc => 'binary_upgrade_set_next_toast_relfilenode' },
{ oid => '4548', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v',
/*
* RelFileNumber data type identifies the specific relation file name.
*/
-typedef uint64 RelFileNumber;
-#define InvalidRelFileNumber ((RelFileNumber) 0)
+typedef Oid RelFileNumber;
+#define InvalidRelFileNumber ((RelFileNumber) InvalidOid)
#define RelFileNumberIsValid(relnumber) \
((bool) ((relnumber) != InvalidRelFileNumber))
-#define atorelnumber(x) ((RelFileNumber) strtou64((x), NULL, 10))
-#define MAX_RELFILENUMBER UINT64CONST(0x00FFFFFFFFFFFFFF)
/*
* Name of major-version-specific tablespace subdirectories
/* Characters to allow for an OID in a relation path */
#define OIDCHARS 10 /* max chars printed by %u */
-#define RELNUMBERCHARS 20 /* max chars printed by UINT64_FORMAT */
/*
* Stuff for fork names.
extern bool option_parse_int(const char *optarg, const char *optname,
int min_range, int max_range,
int *result);
-extern bool option_parse_relfilenumber(const char *optarg,
- const char *optname);
#endif /* OPTION_UTILS_H */
{
Oid spcOid; /* tablespace oid */
Oid dbOid; /* database oid */
-
- /*
- * relForkDetails[] stores the fork number in the high 8 bits of the first
- * integer; the remaining 56 bits are used to store the relfilenmber.
- * Expanding the relfilenumber to a full 64 bits would require widening
- * the BufferTag, which is undesirable for performance reasons. We use
- * two 32-bit values here rather than a single 64-bit value to avoid
- * padding the struct out to a multiple of 8 bytes.
- */
- uint32 relForkDetails[2];
+ RelFileNumber relNumber; /* relation file number */
+ ForkNumber forkNum; /* fork number */
BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag;
-/* High relNumber bits in relForkDetails[0] */
-#define BUFTAG_RELNUM_HIGH_BITS 24
-
-/* Low relNumber bits in relForkDetails[1] */
-#define BUFTAG_RELNUM_LOW_BITS 32
-
-/* Mask to fetch high bits of relNumber from relForkDetails[0] */
-#define BUFTAG_RELNUM_HIGH_MASK ((1U << BUFTAG_RELNUM_HIGH_BITS) - 1)
-
-/* Mask to fetch low bits of relNumber from relForkDetails[1] */
-#define BUFTAG_RELNUM_LOW_MASK 0XFFFFFFFF
-
static inline RelFileNumber
BufTagGetRelNumber(const BufferTag *tag)
{
- uint64 relnum;
-
- relnum = ((uint64) tag->relForkDetails[0]) & BUFTAG_RELNUM_HIGH_MASK;
- relnum = (relnum << BUFTAG_RELNUM_LOW_BITS) | tag->relForkDetails[1];
-
- Assert(relnum <= MAX_RELFILENUMBER);
- return (RelFileNumber) relnum;
+ return tag->relNumber;
}
static inline ForkNumber
BufTagGetForkNum(const BufferTag *tag)
{
- ForkNumber ret;
-
- StaticAssertStmt(MAX_FORKNUM <= INT8_MAX,
- "MAX_FORKNUM can't be greater than INT8_MAX");
-
- ret = (ForkNumber) (tag->relForkDetails[0] >> BUFTAG_RELNUM_HIGH_BITS);
- return ret;
+ return tag->forkNum;
}
static inline void
BufTagSetRelForkDetails(BufferTag *tag, RelFileNumber relnumber,
ForkNumber forknum)
{
- Assert(relnumber <= MAX_RELFILENUMBER);
- Assert(forknum <= MAX_FORKNUM);
-
- tag->relForkDetails[0] = (relnumber >> BUFTAG_RELNUM_LOW_BITS) &
- BUFTAG_RELNUM_HIGH_MASK;
- tag->relForkDetails[0] |= (forknum << BUFTAG_RELNUM_HIGH_BITS);
- tag->relForkDetails[1] = relnumber & BUFTAG_RELNUM_LOW_MASK;
+ tag->relNumber = relnumber;
+ tag->forkNum = forknum;
}
static inline RelFileLocator
{
return (tag1->spcOid == tag2->spcOid) &&
(tag1->dbOid == tag2->dbOid) &&
- (tag1->relForkDetails[0] == tag2->relForkDetails[0]) &&
- (tag1->relForkDetails[1] == tag2->relForkDetails[1]) &&
- (tag1->blockNum == tag2->blockNum);
+ (tag1->relNumber == tag2->relNumber) &&
+ (tag1->blockNum == tag2->blockNum) &&
+ (tag1->forkNum == tag2->forkNum);
}
static inline bool
* Nonzero dbOid values correspond to pg_database.oid.
*
* relNumber identifies the specific relation. relNumber corresponds to
- * pg_class.relfilenode. Notice that relNumber values are assigned by
- * GetNewRelFileNumber(), which will only ever assign the same value once
- * during the lifetime of a cluster. However, since CREATE DATABASE duplicates
- * the relfilenumbers of the template database, the values are in practice only
- * unique within a database, not globally.
+ * pg_class.relfilenode (NOT pg_class.oid, because we need to be able
+ * to assign new physical files to relations in some situations).
+ * Notice that relNumber is only unique within a database in a particular
+ * tablespace.
*
* Note: spcOid must be GLOBALTABLESPACE_OID if and only if dbOid is
* zero. We support shared relations only in the "global" tablespace.
BackendId backend;
} RelFileLocatorBackend;
-#define SizeOfRelFileLocatorBackend \
- (offsetof(RelFileLocatorBackend, backend) + sizeof(BackendId))
-
#define RelFileLocatorBackendIsTemp(rlocator) \
((rlocator).backend != InvalidBackendId)
typedef struct
{
- /* note: field layout chosen to pack into 20 bytes */
+ /* note: field layout chosen to pack into 16 bytes */
int8 id; /* type field --- must be first */
int8 backend_hi; /* high bits of backend ID, if temprel */
uint16 backend_lo; /* low bits of backend ID, if temprel */
- Oid dbOid;
- Oid spcOid;
- uint32 relNumber_hi; /* avoid 8 byte alignment requirement */
- uint32 relNumber_lo;
+ RelFileLocator rlocator; /* spcOid, dbOid, relNumber */
} SharedInvalSmgrMsg;
#define SHAREDINVALRELMAP_ID (-4)
c.oid = oldoid as orig_oid,
case relfilenode
when 0 then 'none'
+ when c.oid then 'own'
when oldfilenode then 'orig'
- else 'new'
+ else 'OTHER'
end as storage,
obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname)
relname | orig_oid | storage | desc
------------------------------+----------+---------+---------------
at_partitioned | t | none |
- at_partitioned_0 | t | orig |
- at_partitioned_0_id_name_key | t | orig | child 0 index
- at_partitioned_1 | t | orig |
- at_partitioned_1_id_name_key | t | orig | child 1 index
+ at_partitioned_0 | t | own |
+ at_partitioned_0_id_name_key | t | own | child 0 index
+ at_partitioned_1 | t | own |
+ at_partitioned_1_id_name_key | t | own | child 1 index
at_partitioned_id_name_key | t | none | parent index
(6 rows)
c.oid = oldoid as orig_oid,
case relfilenode
when 0 then 'none'
+ when c.oid then 'own'
when oldfilenode then 'orig'
- else 'new'
+ else 'OTHER'
end as storage,
obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname)
relname | orig_oid | storage | desc
------------------------------+----------+---------+--------------
at_partitioned | t | none |
- at_partitioned_0 | t | orig |
- at_partitioned_0_id_name_key | f | new | parent index
- at_partitioned_1 | t | orig |
- at_partitioned_1_id_name_key | f | new | parent index
+ at_partitioned_0 | t | own |
+ at_partitioned_0_id_name_key | f | own | parent index
+ at_partitioned_1 | t | own |
+ at_partitioned_1_id_name_key | f | own | parent index
at_partitioned_id_name_key | f | none | parent index
(6 rows)
RETURNS boolean
LANGUAGE plpgsql AS $$
DECLARE
- v_relfilenode int8;
+ v_relfilenode oid;
BEGIN
v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename;
--
SET search_path = fast_default;
CREATE SCHEMA fast_default;
-CREATE TABLE m(id BIGINT);
-INSERT INTO m VALUES (NULL::BIGINT);
+CREATE TABLE m(id OID);
+INSERT INTO m VALUES (NULL::OID);
CREATE FUNCTION set(tabname name) RETURNS VOID
AS $$
BEGIN
NOTICE: checking pg_attribute {attrelid} => pg_class {oid}
NOTICE: checking pg_attribute {atttypid} => pg_type {oid}
NOTICE: checking pg_attribute {attcollation} => pg_collation {oid}
-NOTICE: checking pg_class {relam} => pg_am {oid}
NOTICE: checking pg_class {relnamespace} => pg_namespace {oid}
NOTICE: checking pg_class {reltype} => pg_type {oid}
NOTICE: checking pg_class {reloftype} => pg_type {oid}
NOTICE: checking pg_class {relowner} => pg_authid {oid}
+NOTICE: checking pg_class {relam} => pg_am {oid}
NOTICE: checking pg_class {reltablespace} => pg_tablespace {oid}
NOTICE: checking pg_class {reltoastrelid} => pg_class {oid}
NOTICE: checking pg_class {relrewrite} => pg_class {oid}
c.oid = oldoid as orig_oid,
case relfilenode
when 0 then 'none'
+ when c.oid then 'own'
when oldfilenode then 'orig'
- else 'new'
+ else 'OTHER'
end as storage,
obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname)
c.oid = oldoid as orig_oid,
case relfilenode
when 0 then 'none'
+ when c.oid then 'own'
when oldfilenode then 'orig'
- else 'new'
+ else 'OTHER'
end as storage,
obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname)
RETURNS boolean
LANGUAGE plpgsql AS $$
DECLARE
- v_relfilenode int8;
+ v_relfilenode oid;
BEGIN
v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename;
SET search_path = fast_default;
CREATE SCHEMA fast_default;
-CREATE TABLE m(id BIGINT);
-INSERT INTO m VALUES (NULL::BIGINT);
+CREATE TABLE m(id OID);
+INSERT INTO m VALUES (NULL::OID);
CREATE FUNCTION set(tabname name) RETURNS VOID
AS $$