It can be set to either COPY (the default) or CLONE if the system
supports it. CLONE causes callers of copydir(), currently CREATE
DATABASE ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET TABLESPACE =
..., to use copy_file_range (Linux, FreeBSD) or copyfile (macOS) to copy
files instead of a read-write loop over the contents.
CLONE gives the kernel the opportunity to share block ranges on
copy-on-write file systems and push copying down to storage on others,
depending on configuration. On some systems CLONE can be used to clone
large databases quickly with CREATE DATABASE ... TEMPLATE=source
STRATEGY=FILE_COPY.
Other operating systems could be supported; patches welcome.
Co-authored-by: Nazir Bilal Yavuz <[email protected]>
Reviewed-by: Robert Haas <[email protected]>
Reviewed-by: Ranier Vilela <[email protected]>
Discussion: https://postgr.es/m/CA%2BhUKGLM%2Bt%2BSwBU-cHeMUXJCOgBxSHLGZutV5zCwY4qrCcE02w%40mail.gmail.com
</listitem>
</varlistentry>
+ <varlistentry id="guc_file_copy_method" xreflabel="file_copy_method">
+ <term><varname>file_copy_method</varname> (<type>enum</type>)
+ <indexterm>
+ <primary><varname>file_copy_method</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specifies the method used to copy files.
+ Possible values are <literal>COPY</literal> (default) and
+ <literal>CLONE</literal> (if operating support is available).
+ </para>
+
+ <para>
+ This parameter affects:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <literal><command>CREATE DATABASE ... STRATEGY=FILE_COPY</command></literal>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <command>ALTER DATABASE ... SET TABLESPACE ...</command>
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ <literal>CLONE</literal> uses the <function>copy_file_range()</function>
+ (Linux, FreeBSD) or <function>copyfile</function>
+ (macOS) system calls, giving the kernel the opportunity to share disk
+ blocks or push work down to lower layers on some file systems.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
<indexterm>
default tablespace to the new tablespace. The new default tablespace
must be empty for this database, and no one can be connected to
the database. Tables and indexes in non-default tablespaces are
- unaffected.
+ unaffected. The method used to copy files to the new tablespace
+ is affected by the <xref glinkend="guc_file_copy_method"/> setting.
</para>
<para>
log volume substantially, especially if the template database is large,
it also forces the system to perform a checkpoint both before and
after the creation of the new database. In some situations, this may
- have a noticeable negative impact on overall system performance.
+ have a noticeable negative impact on overall system performance. The
+ <literal>FILE_COPY</literal> strategy is affected by the <xref
+ linkend="guc_file_copy_method"/> setting.
</para>
</listitem>
</varlistentry>
#include "postgres.h"
+#ifdef HAVE_COPYFILE_H
+#include <copyfile.h>
+#endif
#include <fcntl.h>
#include <unistd.h>
#include "storage/copydir.h"
#include "storage/fd.h"
+/* GUCs */
+int file_copy_method = FILE_COPY_METHOD_COPY;
+
+static void clone_file(const char *fromfile, const char *tofile);
+
/*
* copydir: copy a directory
*
* If recurse is false, subdirectories are ignored. Anything that's not
* a directory or a regular file is ignored.
+ *
+ * This function uses the file_copy_method GUC. New uses of this function must
+ * be documented in doc/src/sgml/config.sgml.
*/
void
copydir(const char *fromdir, const char *todir, bool recurse)
copydir(fromfile, tofile, true);
}
else if (xlde_type == PGFILETYPE_REG)
- copy_file(fromfile, tofile);
+ {
+ if (file_copy_method == FILE_COPY_METHOD_CLONE)
+ clone_file(fromfile, tofile);
+ else
+ copy_file(fromfile, tofile);
+ }
}
FreeDir(xldir);
pfree(buffer);
}
+
+/*
+ * clone one file
+ */
+static void
+clone_file(const char *fromfile, const char *tofile)
+{
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+ if (copyfile(fromfile, tofile, NULL, COPYFILE_CLONE_FORCE) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not clone file \"%s\" to \"%s\": %m",
+ fromfile, tofile)));
+#elif defined(HAVE_COPY_FILE_RANGE)
+ int srcfd;
+ int dstfd;
+ ssize_t nbytes;
+
+ srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY);
+ if (srcfd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", fromfile)));
+
+ dstfd = OpenTransientFile(tofile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
+ if (dstfd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", tofile)));
+
+ do
+ {
+ /*
+ * Don't copy too much at once, so we can check for interrupts from
+ * time to time if it falls back to a slow copy.
+ */
+ CHECK_FOR_INTERRUPTS();
+ pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_COPY);
+ nbytes = copy_file_range(srcfd, NULL, dstfd, NULL, 1024 * 1024, 0);
+ if (nbytes < 0 && errno != EINTR)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not clone file \"%s\" to \"%s\": %m",
+ fromfile, tofile)));
+ pgstat_report_wait_end();
+ }
+ while (nbytes != 0);
+
+ if (CloseTransientFile(dstfd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", tofile)));
+
+ if (CloseTransientFile(srcfd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", fromfile)));
+#else
+ /* If there is no CLONE support this function should not be called. */
+ pg_unreachable();
+#endif
+}
CONTROL_FILE_SYNC_UPDATE "Waiting for an update to the <filename>pg_control</filename> file to reach durable storage."
CONTROL_FILE_WRITE "Waiting for a write to the <filename>pg_control</filename> file."
CONTROL_FILE_WRITE_UPDATE "Waiting for a write to update the <filename>pg_control</filename> file."
+COPY_FILE_COPY "Waiting for a file copy operation."
COPY_FILE_READ "Waiting for a read during a file copy operation."
COPY_FILE_WRITE "Waiting for a write during a file copy operation."
DATA_FILE_EXTEND "Waiting for a relation data file to be extended."
*/
#include "postgres.h"
+#ifdef HAVE_COPYFILE_H
+#include <copyfile.h>
+#endif
#include <float.h>
#include <limits.h>
#ifdef HAVE_SYSLOG
#include "storage/aio.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
+#include "storage/copydir.h"
#include "storage/io_worker.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
{NULL, 0, false}
};
+static const struct config_enum_entry file_copy_method_options[] = {
+ {"copy", FILE_COPY_METHOD_COPY, false},
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) || defined(HAVE_COPY_FILE_RANGE)
+ {"clone", FILE_COPY_METHOD_CLONE, false},
+#endif
+ {NULL, 0, false}
+};
+
/*
* Options for enum values stored in other modules
*/
NULL, NULL, NULL
},
+ {
+ {"file_copy_method", PGC_USERSET, RESOURCES_DISK,
+ gettext_noop("Selects the file copy method."),
+ NULL
+ },
+ &file_copy_method,
+ FILE_COPY_METHOD_COPY, file_copy_method_options,
+ NULL, NULL, NULL
+ },
+
{
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
gettext_noop("Selects the method used for forcing WAL updates to disk."),
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue
+#file_copy_method = copy # the default is the first option
+ # copy
+ # clone (if system support is available)
+
# - Kernel Resources -
#max_files_per_process = 1000 # min 64
#ifndef COPYDIR_H
#define COPYDIR_H
+typedef enum FileCopyMethod
+{
+ FILE_COPY_METHOD_COPY,
+ FILE_COPY_METHOD_CLONE,
+} FileCopyMethod;
+
+/* GUC parameters */
+extern PGDLLIMPORT int file_copy_method;
+
extern void copydir(const char *fromdir, const char *todir, bool recurse);
extern void copy_file(const char *fromfile, const char *tofile);
FieldStore
File
FileBackupMethod
+FileCopyMethod
FileFdwExecutionState
FileFdwPlanState
FileNameMap