fix check_snapshot_gc_scn in case of cluster restarted
This commit is contained in:
@ -31,6 +31,7 @@
|
||||
#include "lib/utility/ob_tracepoint.h"
|
||||
#include "storage/tx/ob_ts_mgr.h"
|
||||
#include "storage/tx/wrs/ob_weak_read_util.h"
|
||||
#include "share/ob_server_table_operator.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -620,6 +621,8 @@ int ObFreezeInfoManager::check_snapshot_gc_scn()
|
||||
SCN cur_gts_scn;
|
||||
SCN snapshot_gc_scn;
|
||||
int64_t delay = 0;
|
||||
int64_t start_service_time = -1;
|
||||
int64_t total_service_time = -1;
|
||||
|
||||
ObRecursiveMutexGuard guard(lock_);
|
||||
if (OB_FAIL(check_inner_stat())) {
|
||||
@ -638,8 +641,23 @@ int ObFreezeInfoManager::check_snapshot_gc_scn()
|
||||
|
||||
if (TC_REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
|
||||
if (delay > SNAPSHOT_GC_TS_ERROR) {
|
||||
// In order to avoid LOG_ERROR when the tenant reloads old snapshot_gc_scn due to the
|
||||
// cluster restarted. LOG_ERROR should satisfy two additional conditions:
|
||||
// 1. start_service_time > 0. start_service_time is initialized to 0 when observer starts.
|
||||
// Then it will be updated to the time when observer starts through heartbeat, which is
|
||||
// scheduled every 2 seconds.
|
||||
// 2. total_service_time > SNAPSHOT_GC_TS_ERROR.
|
||||
ObServerTableOperator st_operator;
|
||||
if (OB_FAIL(st_operator.init(sql_proxy_))) {
|
||||
LOG_WARN("fail to init server table operator", K(ret), K_(tenant_id));
|
||||
} else if (OB_FAIL(st_operator.get_start_service_time(GCONF.self_addr_, start_service_time))) {
|
||||
LOG_WARN("fail to get start service time", KR(ret), K_(tenant_id));
|
||||
} else if (FALSE_IT(total_service_time = ObTimeUtility::current_time() - start_service_time)) {
|
||||
} else if ((start_service_time > 0) && (total_service_time > SNAPSHOT_GC_TS_ERROR)) {
|
||||
LOG_ERROR("rs_monitor_check : snapshot_gc_ts delay for a long time",
|
||||
K(snapshot_gc_time_us), K(delay), K_(tenant_id));
|
||||
K(snapshot_gc_time_us), K(delay), K_(tenant_id), K(start_service_time),
|
||||
K(total_service_time));
|
||||
}
|
||||
} else if (delay > SNAPSHOT_GC_TS_WARN) {
|
||||
LOG_WARN("rs_monitor_check : snapshot_gc_ts delay for a long time",
|
||||
K(snapshot_gc_time_us), K(delay), K_(tenant_id));
|
||||
|
@ -44,7 +44,7 @@ public:
|
||||
share::SCN latest_snapshot_gc_scn_;
|
||||
|
||||
ObFreezeInfo()
|
||||
: frozen_statuses_(), latest_snapshot_gc_scn_()
|
||||
: frozen_statuses_(), latest_snapshot_gc_scn_(share::SCN::min_scn())
|
||||
{}
|
||||
~ObFreezeInfo() {}
|
||||
|
||||
|
@ -786,7 +786,7 @@ void ObMajorMergeScheduler::check_merge_interval_time(const bool is_merging)
|
||||
int64_t global_merge_start_time = -1;
|
||||
int64_t max_merge_time = -1;
|
||||
int64_t start_service_time = -1;
|
||||
int64_t all_service_time = -1;
|
||||
int64_t total_service_time = -1;
|
||||
if (OB_ISNULL(zone_merge_mgr_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("zone_merge_mgr_ is unexpected nullptr", KR(ret), K_(tenant_id));
|
||||
@ -815,18 +815,23 @@ void ObMajorMergeScheduler::check_merge_interval_time(const bool is_merging)
|
||||
} else if (OB_FAIL(st_operator.get_start_service_time(GCONF.self_addr_, start_service_time))) {
|
||||
LOG_WARN("fail to get start service time", KR(ret), K_(tenant_id));
|
||||
} else {
|
||||
all_service_time = now - start_service_time;
|
||||
total_service_time = now - start_service_time;
|
||||
}
|
||||
}
|
||||
// LOG_ERROR should satisfy one additional condition: all_service_time > MAX_NO_MERGE_INTERVAL.
|
||||
// So as to avoid LOG_ERROR when the tenant miss daily merge due to the cluster restarted.
|
||||
if (OB_SUCC(ret) && !is_paused() && (all_service_time > MAX_NO_MERGE_INTERVAL)) {
|
||||
// In order to avoid LOG_ERROR when the tenant miss daily merge due to the cluster restarted.
|
||||
// LOG_ERROR should satisfy two additional condition:
|
||||
// 1. start_service_time > 0. start_service_time is initialized to 0 when observer starts.
|
||||
// Then it will be updated to the time when observer starts through heartbeat, which is
|
||||
// scheduled every 2 seconds.
|
||||
// 2. total_service_time > MAX_NO_MERGE_INTERVAL.
|
||||
if (OB_SUCC(ret) && !is_paused() && (start_service_time > 0)
|
||||
&& (total_service_time > MAX_NO_MERGE_INTERVAL)) {
|
||||
if (is_merging) {
|
||||
if ((now - max_merge_time) > MAX_NO_MERGE_INTERVAL) {
|
||||
if (TC_REACH_TIME_INTERVAL(30 * 60 * 1000 * 1000)) {
|
||||
LOG_ERROR("long time major freeze not finish, please check it", KR(ret),
|
||||
K(global_last_merged_time), K(global_merge_start_time), K(max_merge_time),
|
||||
K(now), K_(tenant_id), K(is_merging), K(start_service_time), K(all_service_time));
|
||||
K(now), K_(tenant_id), K(is_merging), K(start_service_time), K(total_service_time));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -840,7 +845,7 @@ void ObMajorMergeScheduler::check_merge_interval_time(const bool is_merging)
|
||||
if (TC_REACH_TIME_INTERVAL(30 * 60 * 1000 * 1000)) {
|
||||
LOG_ERROR("long time no major freeze, please check it", KR(ret),
|
||||
K(global_last_merged_time), K(global_merge_start_time), K(max_merge_time),
|
||||
K(now), K_(tenant_id), K(is_merging), K(start_service_time), K(all_service_time));
|
||||
K(now), K_(tenant_id), K(is_merging), K(start_service_time), K(total_service_time));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user