Add file_extend_method=posix_fallocate,write_zeros.

Provide a way to disable the use of posix_fallocate() for relation
files.  It was introduced by commit 4d330a61bb1.  The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:

 * BTRFS compression is disabled by the use of posix_fallocate()
 * XFS could produce spurious ENOSPC errors in some Linux kernel
   versions, though that problem is reported to have been fixed

The default is file_extend_method=posix_fallocate if available, as
before.  The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.

Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
This commit is contained in:
Thomas Munro
2025-05-31 22:50:22 +12:00
parent 221232596f
commit e37b598028
6 changed files with 90 additions and 5 deletions

View File

@ -2210,6 +2210,42 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
<varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
<term><varname>file_extend_method</varname> (<type>enum</type>)
<indexterm>
<primary><varname>file_extend_method</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Specifies the method used to extend data files during bulk operations
such as <command>COPY</command>. The first available option is used as
the default, depending on the operating system:
<itemizedlist>
<listitem>
<para>
<literal>posix_fallocate</literal> (Unix) uses the standard POSIX
interface for allocating disk space, but is missing on some systems.
If it is present but the underlying file system doesn't support it,
this option silently falls back to <literal>write_zeros</literal>.
Current versions of BTRFS are known to disable compression when
this option is used.
This is the default on systems that have the function.
</para>
</listitem>
<listitem>
<para>
<literal>write_zeros</literal> extends files by writing out blocks
of zero bytes. This is the default on systems that don't have the
function <function>posix_fallocate</function>.
</para>
</listitem>
</itemizedlist>
The <literal>write_zeros</literal> method is always used when data
files are extended by 8 blocks or fewer.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>

View File

@ -164,6 +164,9 @@ bool data_sync_retry = false;
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC;
/* How data files should be bulk-extended with zeros. */
int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;

View File

@ -577,13 +577,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* that decision should be made though? For now just use a cutoff of
* 8, anything between 4 and 8 worked OK in some local testing.
*/
if (numblocks > 8)
if (numblocks > 8 &&
file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
int ret;
int ret = 0;
ret = FileFallocate(v->mdfd_vfd,
seekpos, (off_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
#ifdef HAVE_POSIX_FALLOCATE
if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
{
ret = FileFallocate(v->mdfd_vfd,
seekpos, (off_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
}
else
#endif
{
elog(ERROR, "unsupported file_extend_method: %d",
file_extend_method);
}
if (ret != 0)
{
ereport(ERROR,

View File

@ -66,6 +66,7 @@
#include "replication/slot.h"
#include "replication/syncrep.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
#include "storage/predicate.h"
@ -470,6 +471,14 @@ static const struct config_enum_entry wal_compression_options[] = {
{NULL, 0, false}
};
static const struct config_enum_entry file_extend_method_options[] = {
#ifdef HAVE_POSIX_FALLOCATE
{"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
#endif
{"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
{NULL, 0, false}
};
/*
* Options for enum values stored in other modules
*/
@ -4847,6 +4856,16 @@ struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
{
{"file_extend_method", PGC_SIGHUP, RESOURCES_DISK,
gettext_noop("Selects the method used for extending data files."),
NULL
},
&file_extend_method,
DEFAULT_FILE_EXTEND_METHOD, file_extend_method_options,
NULL, NULL, NULL
},
{
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
gettext_noop("Selects the method used for forcing WAL updates to disk."),

View File

@ -167,6 +167,11 @@
#temp_file_limit = -1 # limits per-process temp file space
# in kilobytes, or -1 for no limit
#file_extend_method = posix_fallocate # the default is the first option supported
# by the operating system:
# posix_fallocate (most Unix-like systems)
# write_zeros
# - Kernel Resources -
#max_files_per_process = 1000 # min 64

View File

@ -59,12 +59,23 @@ typedef int File;
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
enum FileExtendMethod
{
#ifdef HAVE_POSIX_FALLOCATE
FILE_EXTEND_METHOD_POSIX_FALLOCATE,
#endif
FILE_EXTEND_METHOD_WRITE_ZEROS,
};
/* Default to the first available file_extend_method. */
#define DEFAULT_FILE_EXTEND_METHOD 0
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
extern PGDLLIMPORT int file_extend_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()