Handle DROP DATABASE getting interrupted

Until now, when DROP DATABASE got interrupted in the wrong moment, the removal
of the pg_database row would also roll back, even though some irreversible
steps have already been taken. E.g. DropDatabaseBuffers() might have thrown
out dirty buffers, or files could have been unlinked. But we continued to
allow connections to such a corrupted database.

To fix this, mark databases invalid with an in-place update, just before
starting to perform irreversible steps. As we can't add a new column in the
back branches, we use pg_database.datconnlimit = -2 for this purpose.

An invalid database cannot be connected to anymore, but can still be
dropped.

Unfortunately we can't easily add output to psql's \l to indicate that some
database is invalid, it doesn't fit in any of the existing columns.

Add tests verifying that a interrupted DROP DATABASE is handled correctly in
the backend and in various tools.

Reported-by: Evgeny Morozov <postgresql3@realityexists.net>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Daniel Gustafsson <daniel@yesql.se>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/20230509004637.cgvmfwrbht7xm7p6@awork3.anarazel.de
Discussion: https://postgr.es/m/20230314174521.74jl6ffqsee5mtug@awork3.anarazel.de
Backpatch: 11-, bug present in all supported versions
This commit is contained in:
Andres Freund
2023-07-13 13:03:31 -07:00
parent 82e97b8640
commit f66403749d
19 changed files with 444 additions and 28 deletions

View File

@ -234,7 +234,9 @@ cluster_all_databases(ConnParams *cparams, const char *progname,
int i;
conn = connectMaintenanceDatabase(cparams, progname, echo);
result = executeQuery(conn, "SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;", echo);
result = executeQuery(conn,
"SELECT datname FROM pg_database WHERE datallowconn AND datconnlimit <> -2 ORDER BY 1;",
echo);
PQfinish(conn);
for (i = 0; i < PQntuples(result); i++)

View File

@ -730,7 +730,9 @@ reindex_all_databases(ConnParams *cparams,
int i;
conn = connectMaintenanceDatabase(cparams, progname, echo);
result = executeQuery(conn, "SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;", echo);
result = executeQuery(conn,
"SELECT datname FROM pg_database WHERE datallowconn AND datconnlimit <> -2 ORDER BY 1;",
echo);
PQfinish(conn);
for (i = 0; i < PQntuples(result); i++)

View File

@ -21,4 +21,18 @@ $node->issues_sql_like(
qr/statement: CLUSTER.*statement: CLUSTER/s,
'cluster all databases');
$node->safe_psql(
'postgres', q(
CREATE DATABASE regression_invalid;
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
));
$node->command_ok([ 'clusterdb', '-a' ],
'invalid database not targeted by clusterdb -a');
# Doesn't quite belong here, but don't want to waste time by creating an
# invalid database in 010_clusterdb.pl as well.
$node->command_fails_like([ 'clusterdb', '-d', 'regression_invalid'],
qr/FATAL: cannot connect to invalid database "regression_invalid"/,
'clusterdb cannot target invalid database');
done_testing();

View File

@ -31,4 +31,13 @@ $node->issues_sql_like(
$node->command_fails([ 'dropdb', 'nonexistent' ],
'fails with nonexistent database');
# check that invalid database can be dropped with dropdb
$node->safe_psql(
'postgres', q(
CREATE DATABASE regression_invalid;
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
));
$node->command_ok([ 'dropdb', 'regression_invalid' ],
'invalid database can be dropped');
done_testing();

View File

@ -18,4 +18,18 @@ $node->issues_sql_like(
qr/statement: REINDEX.*statement: REINDEX/s,
'reindex all databases');
$node->safe_psql(
'postgres', q(
CREATE DATABASE regression_invalid;
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
));
$node->command_ok([ 'reindexdb', '-a' ],
'invalid database not targeted by reindexdb -a');
# Doesn't quite belong here, but don't want to waste time by creating an
# invalid database in 090_reindexdb.pl as well.
$node->command_fails_like([ 'reindexdb', '-d', 'regression_invalid'],
qr/FATAL: cannot connect to invalid database "regression_invalid"/,
'reindexdb cannot target invalid database');
done_testing();

View File

@ -16,4 +16,18 @@ $node->issues_sql_like(
qr/statement: VACUUM.*statement: VACUUM/s,
'vacuum all databases');
$node->safe_psql(
'postgres', q(
CREATE DATABASE regression_invalid;
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
));
$node->command_ok([ 'vacuumdb', '-a' ],
'invalid database not targeted by vacuumdb -a');
# Doesn't quite belong here, but don't want to waste time by creating an
# invalid database in 010_vacuumdb.pl as well.
$node->command_fails_like([ 'vacuumdb', '-d', 'regression_invalid'],
qr/FATAL: cannot connect to invalid database "regression_invalid"/,
'vacuumdb cannot target invalid database');
done_testing();

View File

@ -741,7 +741,7 @@ vacuum_all_databases(ConnParams *cparams,
conn = connectMaintenanceDatabase(cparams, progname, echo);
result = executeQuery(conn,
"SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;",
"SELECT datname FROM pg_database WHERE datallowconn AND datconnlimit <> -2 ORDER BY 1;",
echo);
PQfinish(conn);