From e40c175b5220140bc50a3047910331bf1d28221e Mon Sep 17 00:00:00 2001 From: mry <1312348150@qq.com> Date: Wed, 1 Dec 2021 15:25:31 +0800 Subject: [PATCH] commit for keep_sync_window --- src/bin/gs_guc/cluster_guc.conf | 1 + src/bin/gs_guc/pg_guc.cpp | 11 +++++++ .../backend/utils/misc/guc/guc_storage.cpp | 14 ++++++++ .../storage/replication/syncrep.cpp | 33 ++++++++++++++++--- .../storage/replication/walsender.cpp | 1 + .../knl/knl_guc/knl_session_attr_storage.h | 1 + src/include/replication/walsender_private.h | 3 +- 7 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index bde35ced6..bae921d5a 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -645,6 +645,7 @@ default_index_kind|int|0,2|NULL|NULL| undo_zone_count|int|0,1048576|NULL|NULL| enable_auto_clean_unique_sql|bool|0,0|NULL|NULL| max_logical_replication_workers|int|0,262143|NULL|Maximum number of logical replication worker processes.| +keep_sync_window|int|0,2147483647|s|NULL| [cmserver] log_dir|string|0,0|NULL|NULL| log_file_size|int|0,2047|MB|NULL| diff --git a/src/bin/gs_guc/pg_guc.cpp b/src/bin/gs_guc/pg_guc.cpp index 401d32c5b..6f1944b65 100644 --- a/src/bin/gs_guc/pg_guc.cpp +++ b/src/bin/gs_guc/pg_guc.cpp @@ -1188,6 +1188,16 @@ static void CheckLastValidReplconninfo(char** opt_lines, int idx) "the host role will be changed to Normal if the local_role is primary now.\n"); } } + +static void CheckKeepSyncWindow(char** opt_lines, int idx) +{ + /* Give a warning if keep_sync_window is set */ + if (strcmp(config_param[idx], "keep_sync_window") == 0) { + write_stderr("\nWARNING: If the primary server fails during keep_sync_window, the transactions which " + "were blocked during keep_sync_window will be lost and can't get recovered. This will affect RPO.\n"); + } +} + #endif /* @@ -1285,6 +1295,7 @@ do_gucset(const char *action_type, const char *data_dir) #ifndef ENABLE_MULTIPLE_NODES CheckLastValidReplconninfo(opt_lines, i); + CheckKeepSyncWindow(opt_lines, i); #endif /* find the line where guc parameter in */ diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index 35b388d3a..45c79e2a0 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -2876,6 +2876,20 @@ static void InitStorageConfigureNamesInt() NULL, NULL, NULL}, + {{"keep_sync_window", + PGC_SIGHUP, + NODE_SINGLENODE, + REPLICATION_MASTER, + gettext_noop("The length of keep sync window."), + NULL, + GUC_UNIT_S}, + &u_sess->attr.attr_storage.keep_sync_window, + 0, + 0, + INT_MAX, + NULL, + NULL, + NULL}, {{"max_concurrent_autonomous_transactions", PGC_POSTMASTER, NODE_ALL, diff --git a/src/gausskernel/storage/replication/syncrep.cpp b/src/gausskernel/storage/replication/syncrep.cpp index df592d8e4..de4485198 100755 --- a/src/gausskernel/storage/replication/syncrep.cpp +++ b/src/gausskernel/storage/replication/syncrep.cpp @@ -155,6 +155,9 @@ void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool enableHandleCancel) char *new_status = NULL; const char *old_status = NULL; int mode = u_sess->attr.attr_storage.sync_rep_wait_mode; + TimestampTz now_time; + TimestampTz now_time_dynamical; + bool sync_master_standalone_window = false; /* * Fast exit if user has not requested sync replication, or there are no @@ -182,9 +185,21 @@ void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool enableHandleCancel) * condition but we'll be fetching that cache line anyway so its likely to * be a low cost check. We don't wait for sync rep if no sync standbys alive */ + now_time = GetCurrentTimestamp(); + long diff_sec; + int diff_microsec; + + if (t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone) { + TimestampDifference(t_thrd.walsender_cxt.WalSndCtl->last_sync_master_standalone_time, now_time, &diff_sec, + &diff_microsec); + if(diff_sec >= (long)u_sess->attr.attr_storage.keep_sync_window) { + sync_master_standalone_window = true; + } + } + if (!t_thrd.walsender_cxt.WalSndCtl->sync_standbys_defined || XLByteLE(XactCommitLSN, t_thrd.walsender_cxt.WalSndCtl->lsn[mode]) || - t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone || + sync_master_standalone_window || !SynRepWaitCatchup(XactCommitLSN)) { LWLockRelease(SyncRepLock); RESUME_INTERRUPTS(); @@ -312,8 +327,17 @@ void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool enableHandleCancel) /* * If we modify the syncmode dynamically, we'll stop wait */ - if (t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone || - synchronous_commit <= SYNCHRONOUS_COMMIT_LOCAL_FLUSH) { + now_time_dynamical = GetCurrentTimestamp(); + long diff_sec_dynamical; + int diff_microsec_dynamical; + if (t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone) { + TimestampDifference(t_thrd.walsender_cxt.WalSndCtl->last_sync_master_standalone_time, now_time_dynamical, + &diff_sec_dynamical, &diff_microsec_dynamical); + } + if (sync_master_standalone_window || + synchronous_commit <= SYNCHRONOUS_COMMIT_LOCAL_FLUSH || + (t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone && + diff_sec_dynamical >= (long)u_sess->attr.attr_storage.keep_sync_window)) { ereport(WARNING, (errmsg("canceling wait for synchronous replication due to syncmaster standalone."), errdetail("The transaction has already committed locally, but might not have been replicated to " @@ -1364,10 +1388,11 @@ void SyncRepCheckSyncStandbyAlive(void) if (!sync_standby_alive && !t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone && t_thrd.walsender_cxt.WalSndCtl->most_available_sync) { + ereport(LOG, (errmsg("synchronous master is now standalone"))); t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone = true; - + t_thrd.walsender_cxt.WalSndCtl->last_sync_master_standalone_time = GetCurrentTimestamp(); /* * If there is any waiting sender, then wake-up them as * master has switched to standalone mode diff --git a/src/gausskernel/storage/replication/walsender.cpp b/src/gausskernel/storage/replication/walsender.cpp index 1cbe433ae..5355b3bc9 100755 --- a/src/gausskernel/storage/replication/walsender.cpp +++ b/src/gausskernel/storage/replication/walsender.cpp @@ -4349,6 +4349,7 @@ void WalSndShmemInit(void) } t_thrd.walsender_cxt.WalSndCtl->most_available_sync = false; t_thrd.walsender_cxt.WalSndCtl->sync_master_standalone = false; + t_thrd.walsender_cxt.WalSndCtl->last_sync_master_standalone_time = 0; t_thrd.walsender_cxt.WalSndCtl->demotion = NoDemote; SpinLockInit(&t_thrd.walsender_cxt.WalSndCtl->mutex); } diff --git a/src/include/knl/knl_guc/knl_session_attr_storage.h b/src/include/knl/knl_guc/knl_session_attr_storage.h index 33fef53a5..23b9181d1 100755 --- a/src/include/knl/knl_guc/knl_session_attr_storage.h +++ b/src/include/knl/knl_guc/knl_session_attr_storage.h @@ -91,6 +91,7 @@ typedef struct knl_session_attr_storage { bool auto_explain_log_verbose; bool enable_candidate_buf_usage_count; bool enable_ustore_partial_seqscan; + int keep_sync_window; int wait_dummy_time; int DeadlockTimeout; int LockWaitTimeout; diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index eb88fcc3e..7d1ba89c5 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -181,7 +181,8 @@ typedef struct WalSndCtlData { * mode. */ bool sync_master_standalone; - + TimestampTz last_sync_master_standalone_time; + /* * The demotion of postmaster Also indicates that all the walsenders * should reject any demote requests if postmaster is doning domotion.