Files
openGauss-server/src/gausskernel/process/postmaster/bgwriter.cpp
2022-09-07 09:35:18 +08:00

810 lines
30 KiB
C++
Executable File

/*
*
* bgwriter.cpp
*
* The background writer (bgwriter) is new as of Postgres 8.0. It attempts
* to keep regular backends from having to write out dirty shared buffers
* (which they would only do when needing to free a shared buffer to read in
* another page). In the best scenario all writes from shared buffers will
* be issued by the background writer process. However, regular backends are
* still empowered to issue writes if the bgwriter fails to maintain enough
* clean shared buffers.
*
* As of Postgres 9.2 the bgwriter no longer handles checkpoints.
*
* The bgwriter is started by the postmaster as soon as the startup subprocess
* finishes, or as soon as recovery begins if we are doing archive recovery.
* It remains alive until the postmaster commands it to terminate.
* Normal termination is by SIGTERM, which instructs the bgwriter to exit(0).
* Emergency termination is by SIGQUIT; like any backend, the bgwriter will
* simply abort and exit on SIGQUIT.
*
* If the bgwriter exits unexpectedly, the postmaster treats that the same
* as a backend crash: shared memory may be corrupted, so remaining backends
* should be killed by SIGQUIT and then a recovery cycle started.
*
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/gausskernel/process/postmaster/bgwriter.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include <signal.h>
#include <sys/time.h>
#include "access/xlog_internal.h"
#include "access/double_write.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/pagewriter.h"
#include "storage/buf/bufmgr.h"
#include "storage/ipc.h"
#include "storage/lock/lwlock.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/smgr/smgr.h"
#include "storage/spin.h"
#include "storage/standby.h"
#include "utils/guc.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
#include "utils/timestamp.h"
#include "gssignal/gs_signal.h"
#include "replication/slot.h"
/*
* Multiplier to apply to BgWriterDelay when we decide to hibernate.
* (Perhaps this needs to be configurable?)
*/
#define HIBERNATE_FACTOR 50
/*
* LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
* doing so too often or repeatedly if there has been no other write activity
* in the system.
*/
static TimestampTz last_snapshot_ts;
static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
/* Signal handlers */
static void bgwriter_quickdie(SIGNAL_ARGS);
static void bgwriter_sighup_handler(SIGNAL_ARGS);
static void bgwriter_request_shutdown_handler(SIGNAL_ARGS);
static void bgwriter_sigusr1_handler(SIGNAL_ARGS);
extern void write_term_log(uint32 term);
/* incremental checkpoint bgwriter thread function */
const int MAX_THREAD_NAME_LEN = 128;
static void setup_bgwriter_signalhook(void)
{
/*
* Reset some signals that are accepted by postmaster but not here
*/
(void)gspqsignal(SIGHUP, bgwriter_sighup_handler); /* set flag to read config file */
(void)gspqsignal(SIGINT, SIG_IGN);
(void)gspqsignal(SIGTERM, bgwriter_request_shutdown_handler); /* shutdown */
(void)gspqsignal(SIGQUIT, bgwriter_quickdie); /* hard crash time */
(void)gspqsignal(SIGALRM, SIG_IGN);
(void)gspqsignal(SIGPIPE, SIG_IGN);
(void)gspqsignal(SIGUSR1, bgwriter_sigusr1_handler);
(void)gspqsignal(SIGUSR2, SIG_IGN);
(void)gspqsignal(SIGURG, print_stack);
/*
* Reset some signals that are accepted by postmaster but not here
*/
(void)gspqsignal(SIGCHLD, SIG_DFL);
(void)gspqsignal(SIGTTIN, SIG_DFL);
(void)gspqsignal(SIGTTOU, SIG_DFL);
(void)gspqsignal(SIGCONT, SIG_DFL);
(void)gspqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&t_thrd.libpq_cxt.BlockSig, SIGQUIT);
}
static void bgwriter_handle_exceptions(WritebackContext *wb_context, MemoryContext bgwriter_cxt)
{
/*
* Close all open files after any error. This is helpful on Windows,
* where holding deleted files open causes various strange errors.
* It's not clear we need it elsewhere, but shouldn't hurt.
*/
/* Since not using PG_TRY, must reset error stack by hand */
t_thrd.log_cxt.error_context_stack = NULL;
t_thrd.log_cxt.call_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/* abort async io, must before LWlock release */
AbortAsyncListIO();
/* release resource held by lsc */
AtEOXact_SysDBCache(false);
/*
* These operations are really just a minimal subset of
* AbortTransaction(). We don't have very many resources to worry
* about in bgwriter, but we do have LWLocks, buffers, and temp files.
*/
LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers();
/* buffer pins are released here: */
ResourceOwnerRelease(t_thrd.utils_cxt.CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true);
/* we needn't bother with the other ResourceOwnerRelease phases */
AtEOXact_Buffers(false);
AtEOXact_SMgr();
AtEOXact_Files();
AtEOXact_HashTables(false);
/* release compression ctx */
crps_destory_ctxs();
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
(void)MemoryContextSwitchTo(bgwriter_cxt);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(bgwriter_cxt);
/* re-initialize to avoid repeated errors causing problems */
WritebackContextInit(wb_context, &u_sess->attr.attr_storage.bgwriter_flush_after);
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/*
* Sleep at least 1 second after any error. A write error is likely
* to be repeated, and we don't want to be filling the error logs as
* fast as we can.
*/
pg_usleep(1000000L);
return;
}
/*
* Main entry point for bgwriter process
*
* This is invoked from AuxiliaryProcessMain, which has already created the
* basic execution environment, but not enabled signals yet.
*/
void BackgroundWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext bgwriter_context;
bool prev_hibernate = false;
WritebackContext wb_context;
t_thrd.role = BGWRITER;
ereport(LOG, (errmsg("bgwriter started")));
setup_bgwriter_signalhook();
/*
* We just started, assume there has been either a shutdown or
* end-of-recovery snapshot.
*/
last_snapshot_ts = GetCurrentTimestamp();
/*
* Create a resource owner to keep track of our resources (currently only
* buffer pins).
*/
t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer",
THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* t_thrd.top_mem_cxt, but resetting that would be a really bad idea.
*/
bgwriter_context = AllocSetContextCreate(t_thrd.top_mem_cxt,
"Background Writer",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(bgwriter_context);
WritebackContextInit(&wb_context, &u_sess->attr.attr_storage.bgwriter_flush_after);
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
int curTryCounter;
int* oldTryCounter = NULL;
if (sigsetjmp(local_sigjmp_buf, 1) != 0) {
gstrace_tryblock_exit(true, oldTryCounter);
bgwriter_handle_exceptions(&wb_context, bgwriter_context);
/* Report wait end here, when there is no further possibility of wait */
pgstat_report_waitevent(WAIT_EVENT_END);
}
oldTryCounter = gstrace_tryblock_entry(&curTryCounter);
/* We can now handle ereport(ERROR) */
t_thrd.log_cxt.PG_exception_stack = &local_sigjmp_buf;
/*
* Unblock signals (they were blocked when the postmaster forked us)
*/
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
(void)gs_signal_unblock_sigusr2();
/*
* Use the recovery target timeline ID during recovery
*/
if (RecoveryInProgress())
t_thrd.xlog_cxt.ThisTimeLineID = GetRecoveryTargetTLI();
/*
* Reset hibernation state after any error.
*/
prev_hibernate = false;
/* init compression ctx for page compression */
crps_create_ctxs(t_thrd.role);
pgstat_report_appname("Background writer");
pgstat_report_activity(STATE_IDLE, NULL);
/*
* Loop forever
*/
for (;;) {
bool can_hibernate = false;
int rc;
/*
* when double write is disabled, pg_dw_meta will be created with dw_file_num = 0, so
* here is for upgrading process. bgwriter will run when enable_incremetal_checkpoint = off.
*/
if (pg_atomic_read_u32(&g_instance.dw_batch_cxt.dw_version) < DW_SUPPORT_REABLE_DOUBLE_WRITE
&& t_thrd.proc->workingVersionNum >= DW_SUPPORT_REABLE_DOUBLE_WRITE) {
dw_upgrade_renable_double_write();
}
/* Clear any already-pending wakeups */
ResetLatch(&t_thrd.proc->procLatch);
pgstat_report_activity(STATE_RUNNING, NULL);
if (t_thrd.bgwriter_cxt.got_SIGHUP) {
t_thrd.bgwriter_cxt.got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
if (t_thrd.bgwriter_cxt.shutdown_requested) {
/*
* From here on, elog(ERROR) should end with exit(1), not send
* control back to the sigsetjmp block above
*/
u_sess->attr.attr_common.ExitOnAnyError = true;
/* Normal exit from the bgwriter is here */
/* release compression ctx */
crps_destory_ctxs();
proc_exit(0); /* done */
}
/*
* Do one cycle of dirty-buffer writing.
*/
can_hibernate = BgBufferSync(&wb_context);
/*
* Send off activity statistics to the stats collector
*/
pgstat_send_bgwriter();
if (FirstCallSinceLastCheckpoint()) {
/*
* After any checkpoint, close all smgr files. This is so we
* won't hang onto smgr references to deleted files indefinitely.
*/
smgrcloseall();
}
/*
* Log a new xl_running_xacts every now and then so replication can get
* into a consistent state faster (think of suboverflowed snapshots)
* and clean up resources (locks, KnownXids*) more frequently. The
* costs of this are relatively low, so doing it 4 times
* (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
*
* We assume the interval for writing xl_running_xacts is
* significantly bigger than BgWriterDelay, so we don't complicate the
* overall timeout handling but just assume we're going to get called
* often enough even if hibernation mode is active. It's not that
* important that log_snap_interval_ms is met strictly. To make sure
* we're not waking the disk up unneccesarily on an idle system we
* check whether there has been any WAL inserted since the last time
* we've logged a running xacts.
*
* We do this logging in the bgwriter as its the only process thats
* run regularly and returns to its mainloop all the
* time. E.g. Checkpointer, when active, is barely ever in its
* mainloop and thus makes it hard to log regularly.
*/
if (XLogStandbyInfoActive() && !RecoveryInProgress()) {
TimestampTz timeout = 0;
TimestampTz now = GetCurrentTimestamp();
timeout = TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS);
/*
* only log if enough time has passed and some xlog record has been
* inserted.
*/
if (now >= timeout && !XLByteEQ(last_snapshot_lsn, GetXLogInsertRecPtr())) {
last_snapshot_lsn = LogStandbySnapshot();
last_snapshot_ts = now;
}
if (now >= timeout) {
LogCheckSlot();
}
}
/*
* Sleep until we are signaled or BgWriterDelay has elapsed.
*
* Note: the feedback control loop in BgBufferSync() expects that we
* will call it every BgWriterDelay msec. While it's not critical for
* correctness that that be exact, the feedback loop might misbehave
* if we stray too far from that. Hence, avoid loading this process
* down with latch events that are likely to happen frequently during
* normal operation.
*/
pgstat_report_activity(STATE_IDLE, NULL);
rc = WaitLatch(&t_thrd.proc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
u_sess->attr.attr_storage.BgWriterDelay /* ms */);
/*
* If no latch event and BgBufferSync says nothing's happening, extend
* the sleep in "hibernation" mode, where we sleep for much longer
* than bgwriter_delay says. Fewer wakeups save electricity. When a
* backend starts using buffers again, it will wake us up by setting
* our latch. Because the extra sleep will persist only as long as no
* buffer allocations happen, this should not distort the behavior of
* BgBufferSync's control loop too badly; essentially, it will think
* that the system-wide idle interval didn't exist.
*
* There is a race condition here, in that a backend might allocate a
* buffer between the time BgBufferSync saw the alloc count as zero
* and the time we call StrategyNotifyBgWriter. While it's not
* critical that we not hibernate anyway, we try to reduce the odds of
* that by only hibernating when BgBufferSync says nothing's happening
* for two consecutive cycles. Also, we mitigate any possible
* consequences of a missed wakeup by not hibernating forever.
*/
if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) {
/* Ask for notification at next buffer allocation */
StrategyNotifyBgWriter(t_thrd.proc->pgprocno);
/* Sleep ... */
rc = WaitLatch(&t_thrd.proc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
u_sess->attr.attr_storage.BgWriterDelay * HIBERNATE_FACTOR);
/* Reset the notification request in case we timed out */
StrategyNotifyBgWriter(-1);
}
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
*/
if (rc & WL_POSTMASTER_DEATH) {
/* release compression ctx */
crps_destory_ctxs();
gs_thread_exit(1);
}
prev_hibernate = can_hibernate;
}
}
/* --------------------------------
* signal handler routines
* --------------------------------
*/
/*
* bg_quickdie() occurs when signalled SIGQUIT by the postmaster.
*
* Some backend has bought the farm,
* so we need to stop what we're doing and exit.
*/
static void bgwriter_quickdie(SIGNAL_ARGS)
{
gs_signal_setmask(&t_thrd.libpq_cxt.BlockSig, NULL);
/*
* We DO NOT want to run proc_exit() callbacks -- we're here because
* shared memory may be corrupted, so we don't want to try to clean up our
* transaction. Just nail the windows shut and get out of town. Now that
* there's an atexit callback to prevent third-party code from breaking
* things by calling exit() directly, we have to reset the callbacks
* explicitly to make this work as intended.
*/
on_exit_reset();
/*
* Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our
* shared memory state. (The "dead man switch" mechanism in pmsignal.c
* should ensure the postmaster sees this as a crash, too, but no harm in
* being doubly sure.)
*/
/* release compression ctx */
crps_destory_ctxs();
exit(2);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
static void bgwriter_sighup_handler(SIGNAL_ARGS)
{
int save_errno = errno;
t_thrd.bgwriter_cxt.got_SIGHUP = true;
if (t_thrd.proc)
SetLatch(&t_thrd.proc->procLatch);
errno = save_errno;
}
/* SIGTERM: set flag to shutdown and exit */
static void bgwriter_request_shutdown_handler(SIGNAL_ARGS)
{
int save_errno = errno;
t_thrd.bgwriter_cxt.shutdown_requested = true;
t_thrd.int_cxt.ProcDiePending = true;
if (t_thrd.proc)
SetLatch(&t_thrd.proc->procLatch);
errno = save_errno;
}
/* SIGUSR1: used for latch wakeups */
static void bgwriter_sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
latch_sigusr1_handler();
errno = save_errno;
}
bool IsBgwriterProcess(void)
{
return (t_thrd.role == BGWRITER);
}
/* bgwriter view function */
Datum bgwriter_view_get_node_name()
{
if (g_instance.attr.attr_common.PGXCNodeName == NULL || g_instance.attr.attr_common.PGXCNodeName[0] == '\0') {
return CStringGetTextDatum("not define");
} else {
return CStringGetTextDatum(g_instance.attr.attr_common.PGXCNodeName);
}
}
Datum bgwriter_view_get_actual_flush_num()
{
return Int64GetDatum(0);
}
Datum bgwriter_view_get_last_flush_num()
{
return Int32GetDatum(0);
}
Datum bgwriter_view_get_candidate_nums()
{
int candidate_num = get_curr_candidate_nums(CAND_LIST_NORMAL) + get_curr_candidate_nums(CAND_LIST_NVM) +
get_curr_candidate_nums(CAND_LIST_SEG);
return Int32GetDatum(candidate_num);
}
Datum bgwriter_view_get_num_candidate_list()
{
return Int64GetDatum(g_instance.ckpt_cxt_ctl->get_buf_num_candidate_list);
}
Datum bgwriter_view_get_num_clock_sweep()
{
return Int64GetDatum(g_instance.ckpt_cxt_ctl->get_buf_num_clock_sweep);
}
const incre_ckpt_view_col g_bgwriter_view_col[INCRE_CKPT_BGWRITER_VIEW_COL_NUM] = {
{"node_name", TEXTOID, bgwriter_view_get_node_name},
{"bgwr_actual_flush_total_num", INT8OID, bgwriter_view_get_actual_flush_num},
{"bgwr_last_flush_num", INT4OID, bgwriter_view_get_last_flush_num},
{"candidate_slots", INT4OID, bgwriter_view_get_candidate_nums},
{"get_buffer_from_list", INT8OID, bgwriter_view_get_num_candidate_list},
{"get_buf_clock_sweep", INT8OID, bgwriter_view_get_num_clock_sweep}};
const uint THREAD_SLEEP_TIME = 10 * 60 * 1000;
void invalid_buffer_bgwriter_main()
{
sigjmp_buf localSigjmpBuf;
MemoryContext bgwriter_context;
char name[MAX_THREAD_NAME_LEN] = {0};
WritebackContext wb_context;
t_thrd.role = SPBGWRITER;
setup_bgwriter_signalhook();
ereport(LOG, (errmsg("invalidate buffer bgwriter started")));
errno_t err_rc = snprintf_s(name, MAX_THREAD_NAME_LEN, MAX_THREAD_NAME_LEN - 1, "%s", "spbgwriter");
securec_check_ss(err_rc, "", "");
/* Create a resource owner to keep track of our resources (currently only buffer pins). */
t_thrd.utils_cxt.CurrentResourceOwner = ResourceOwnerCreate(NULL, name,
THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* t_thrd.top_mem_cxt, but resetting that would be a really bad idea.
*/
bgwriter_context = AllocSetContextCreate(t_thrd.top_mem_cxt, name, ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(bgwriter_context);
WritebackContextInit(&wb_context, &u_sess->attr.attr_storage.bgwriter_flush_after);
if (sigsetjmp(localSigjmpBuf, 1) != 0) {
ereport(WARNING, (errmsg("invalidate buffer bgwriter exception occured.")));
bgwriter_handle_exceptions(&wb_context, bgwriter_context);
}
/* We can now handle ereport(ERROR) */
t_thrd.log_cxt.PG_exception_stack = &localSigjmpBuf;
/* Unblock signals (they were blocked when the postmaster forked us) */
gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL);
(void)gs_signal_unblock_sigusr2();
/* Use the recovery target timeline ID during recovery */
if (RecoveryInProgress()) {
t_thrd.xlog_cxt.ThisTimeLineID = GetRecoveryTargetTLI();
}
pgstat_report_appname("InvalidBufferBgWriter");
pgstat_report_activity(STATE_IDLE, NULL);
g_instance.bgwriter_cxt.invalid_buf_proc_latch = &t_thrd.proc->procLatch;
/* Loop forever */
for (;;) {
int rc;
if (t_thrd.bgwriter_cxt.got_SIGHUP) {
t_thrd.bgwriter_cxt.got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
if (t_thrd.bgwriter_cxt.shutdown_requested) {
ereport(LOG, (errmsg("invalidate buffer bgwriter thread shut down")));
u_sess->attr.attr_common.ExitOnAnyError = true;
proc_exit(0);
}
rc = WaitLatch(&t_thrd.proc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, THREAD_SLEEP_TIME);
if (rc & WL_POSTMASTER_DEATH) {
gs_thread_exit(1);
}
/* Clear any already-pending wakeups */
ResetLatch(&t_thrd.proc->procLatch);
drop_rel_all_forks_buffers();
drop_rel_one_fork_buffers();
}
}
const int HASH_TABLE_ELEMENT_MIN_NUM = 512;
HTAB *relfilenode_hashtbl_create(const char *name, bool use_heap_mem)
{
HASHCTL hashCtrl;
HTAB *hashtbl = NULL;
errno_t rc;
rc = memset_s(&hashCtrl, sizeof(hashCtrl), 0, sizeof(hashCtrl));
securec_check(rc, "", "");
hashCtrl.hcxt = (MemoryContext)CurrentMemoryContext;
hashCtrl.hash = tag_hash;
hashCtrl.keysize = sizeof(RelFileNode);
/* keep entrysize >= keysize, stupid limits */
hashCtrl.entrysize = sizeof(DelFileTag);
if (use_heap_mem) {
hashtbl = HeapMemInitHash(name, HASH_TABLE_ELEMENT_MIN_NUM,
Max(g_instance.attr.attr_common.max_files_per_process, t_thrd.storage_cxt.max_userdatafiles), &hashCtrl,
(HASH_FUNCTION | HASH_ELEM));
if (hashtbl == NULL) {
ereport(FATAL, (errmsg("could not initialize unlinik relation hash table")));
}
} else {
hashtbl = hash_create(name, HASH_TABLE_ELEMENT_MIN_NUM, &hashCtrl, (HASH_CONTEXT | HASH_FUNCTION | HASH_ELEM));
}
return hashtbl;
}
HTAB *relfilenode_fork_hashtbl_create(const char* name, bool use_heap_mem)
{
HASHCTL hashCtrl;
HTAB *hashtbl = NULL;
errno_t rc;
rc = memset_s(&hashCtrl, sizeof(hashCtrl), 0, sizeof(hashCtrl));
securec_check(rc, "", "");
hashCtrl.hcxt = (MemoryContext)CurrentMemoryContext;
hashCtrl.hash = tag_hash;
hashCtrl.keysize = sizeof(ForkRelFileNode);
/* keep entrysize >= keysize, stupid limits */
hashCtrl.entrysize = sizeof(DelForkFileTag);
if (use_heap_mem) {
hashtbl = HeapMemInitHash(name, HASH_TABLE_ELEMENT_MIN_NUM,
Max(g_instance.attr.attr_common.max_files_per_process, t_thrd.storage_cxt.max_userdatafiles),
&hashCtrl, (HASH_FUNCTION | HASH_ELEM));
if (hashtbl == NULL) {
ereport(FATAL, (errmsg("could not initialize unlinik relation hash table")));
}
} else {
hashtbl = hash_create(name, HASH_TABLE_ELEMENT_MIN_NUM, &hashCtrl, (HASH_CONTEXT | HASH_FUNCTION | HASH_ELEM));
}
return hashtbl;
}
void drop_rel_all_forks_buffers()
{
HASH_SEQ_STATUS status;
DelFileTag *entry = NULL;
DelFileTag *temp_entry = NULL;
bool found = false;
uint rel_num = 0;
HTAB *unlink_rel_hashtbl = g_instance.bgwriter_cxt.unlink_rel_hashtbl;
HTAB *rel_bak = relfilenode_hashtbl_create("unlink_rel_bak", false);
/* Obtains the entry in hashtable. */
LWLockAcquire(g_instance.bgwriter_cxt.rel_hashtbl_lock, LW_SHARED);
hash_seq_init(&status, unlink_rel_hashtbl);
while ((temp_entry = (DelFileTag *)hash_seq_search(&status)) != NULL) {
entry = (DelFileTag*)hash_search(rel_bak, (void *)&temp_entry->rnode, HASH_ENTER, &found);
if (!found) {
entry->rnode = temp_entry->rnode;
entry->maxSegNo = temp_entry->maxSegNo;
rel_num++;
}
}
LWLockRelease(g_instance.bgwriter_cxt.rel_hashtbl_lock);
if (rel_num > 0) {
DropRelFileNodeAllBuffersUsingHash(rel_bak);
hash_seq_init(&status, rel_bak);
while ((temp_entry = (DelFileTag *)hash_seq_search(&status)) != NULL) {
if (temp_entry->maxSegNo == -1) {
ereport(DEBUG1, (errmodule(MOD_INCRE_BG),
errmsg("the max segno is -1, skip forget this rel %u/%u/%u, bucketNode is %d",
temp_entry->rnode.spcNode, temp_entry->rnode.dbNode, temp_entry->rnode.relNode,
temp_entry->rnode.bucketNode)));
continue;
}
for (int32 i = 0; i < temp_entry->maxSegNo; i++) {
for (int fork_num = 0; fork_num <= (int)MAX_FORKNUM; fork_num++) {
md_register_forget_request(temp_entry->rnode, fork_num, i);
}
}
LWLockAcquire(g_instance.bgwriter_cxt.rel_hashtbl_lock, LW_EXCLUSIVE);
if (hash_search(unlink_rel_hashtbl, (void *)&temp_entry->rnode, HASH_REMOVE, NULL) == NULL) {
ereport(DEBUG1, (errmodule(MOD_INCRE_BG),
errmsg("rel %u/%u/%u, bucketNode is %d has already been invalidated",
temp_entry->rnode.spcNode, temp_entry->rnode.dbNode, temp_entry->rnode.relNode,
temp_entry->rnode.bucketNode)));
} else {
ereport(DEBUG1, (errmodule(MOD_INCRE_BG),
errmsg("invalidate buffer has been finished for rel %u/%u/%u, bucketNode is %d",
temp_entry->rnode.spcNode, temp_entry->rnode.dbNode, temp_entry->rnode.relNode,
temp_entry->rnode.bucketNode)));
}
LWLockRelease(g_instance.bgwriter_cxt.rel_hashtbl_lock);
}
}
hash_destroy(rel_bak);
}
void drop_rel_one_fork_buffers()
{
HASH_SEQ_STATUS status;
DelForkFileTag *entry = NULL;
DelForkFileTag *temp_entry = NULL;
bool found = false;
uint rel_num = 0;
HTAB *unlink_rel_fork_hashtbl = g_instance.bgwriter_cxt.unlink_rel_fork_hashtbl;
HTAB *rel_bak = relfilenode_fork_hashtbl_create("unlink_rel_one_fork_bak", false);
/* Obtains the entry in hashtable. */
LWLockAcquire(g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock, LW_SHARED);
hash_seq_init(&status, unlink_rel_fork_hashtbl);
while ((temp_entry = (DelForkFileTag *)hash_seq_search(&status)) != NULL) {
entry = (DelForkFileTag*)hash_search(rel_bak, temp_entry, HASH_ENTER, &found);
if (!found) {
entry->forkrnode.rnode.spcNode = temp_entry->forkrnode.rnode.spcNode;
entry->forkrnode.rnode.dbNode = temp_entry->forkrnode.rnode.dbNode;
entry->forkrnode.rnode.relNode = temp_entry->forkrnode.rnode.relNode;
entry->forkrnode.rnode.bucketNode = temp_entry->forkrnode.rnode.bucketNode;
entry->forkrnode.forkNum = temp_entry->forkrnode.forkNum;
entry->maxSegNo = temp_entry->maxSegNo;
rel_num++;
}
}
LWLockRelease(g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock);
if (rel_num > 0) {
DropRelFileNodeOneForkAllBuffersUsingHash(rel_bak);
hash_seq_init(&status, rel_bak);
while ((temp_entry = (DelForkFileTag *)hash_seq_search(&status)) != NULL) {
if (temp_entry->maxSegNo == -1) {
ereport(DEBUG1, (errmodule(MOD_INCRE_BG),
errmsg("the max segno is -1, skip forget this rel %u/%u/%u, bucketNode is %d",
temp_entry->forkrnode.rnode.spcNode, temp_entry->forkrnode.rnode.dbNode,
temp_entry->forkrnode.rnode.relNode, temp_entry->forkrnode.rnode.bucketNode)));
continue;
}
for (int32 i = 0; i < temp_entry->maxSegNo; i++) {
md_register_forget_request(temp_entry->forkrnode.rnode, temp_entry->forkrnode.forkNum, i);
}
LWLockAcquire(g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock, LW_EXCLUSIVE);
if (hash_search(unlink_rel_fork_hashtbl, (void *)temp_entry, HASH_REMOVE, NULL) == NULL) {
ereport(LOG, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("%u/%u/%u, bucketNode is %d, forkNum is %d has been invalidated",
temp_entry->forkrnode.rnode.spcNode, temp_entry->forkrnode.rnode.dbNode,
temp_entry->forkrnode.rnode.relNode, temp_entry->forkrnode.rnode.bucketNode,
temp_entry->forkrnode.forkNum)));
} else {
ereport(LOG, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invaslidate buffer has been finished for rel "
"%u/%u/%u, bucketNode is %d, forkNum is %d",
temp_entry->forkrnode.rnode.spcNode, temp_entry->forkrnode.rnode.dbNode,
temp_entry->forkrnode.rnode.relNode, temp_entry->forkrnode.rnode.bucketNode,
temp_entry->forkrnode.forkNum)));
}
LWLockRelease(g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock);
}
}
hash_destroy(rel_bak);
}