[CP] fix sys ls report

This commit is contained in:
maosy
2024-02-09 22:24:17 +00:00
committed by ob-robot
parent 19d5e20cf1
commit b7ca6dc238
2 changed files with 96 additions and 56 deletions

View File

@ -146,9 +146,7 @@ void ObRecoveryLSService::do_work()
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret), K(inited_), KP(proxy_));
} else {
ObLSRecoveryStatOperator ls_recovery;
palf::PalfBufferIterator iterator;//can not use without palf_guard
int tmp_ret = OB_SUCCESS;
int64_t idle_time_us = 100 * 1000L;
SCN start_scn;
last_report_ts_ = OB_INVALID_TIMESTAMP;
@ -174,31 +172,65 @@ void ObRecoveryLSService::do_work()
restore_status_.reset();
} else if (0 == thread_idx) {
idle_time_us = 10 * 1000 * 1000L;
if (OB_FAIL(process_thread0_(tenant_info))) {
LOG_WARN("failed to process thread0", KR(ret));
}
} else {
if (OB_FAIL(process_thread1_(tenant_info, start_scn, iterator))) {
LOG_WARN("failed to process thread1", KR(ret));
}
}//end thread1
LOG_INFO("[LS_RECOVERY] finish one round", KR(ret), K(idle_time_us),
K(start_scn), K(thread_idx), K(tenant_info), K_(restore_status));
idle(idle_time_us);
ret = OB_SUCCESS;
}//end while
restore_status_.reset();
}
}
int ObRecoveryLSService::process_thread0_(const ObAllTenantInfo &tenant_info)
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
DEBUG_SYNC(STOP_RECOVERY_LS_THREAD0);
//adjust primary zone and balance ls group
if (OB_TMP_FAIL(do_standby_balance_())) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("do standby balance", KR(ret), KR(tmp_ret));
}
if (OB_TMP_FAIL(do_ls_balance_task_())) {
ret = OB_SUCC(ret) ? tmp_ret : ret;
LOG_WARN("failed to process ls balance task", KR(ret), KR(tmp_ret));
}
(void)try_tenant_upgrade_end_();
if (tenant_info.is_standby()) {
if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000)) {
(void)try_update_primary_ip_list();
}
}
} else {
return ret;
}
int ObRecoveryLSService::process_thread1_(const ObAllTenantInfo &tenant_info,
share::SCN &start_scn,
palf::PalfBufferIterator &iterator)
{
int ret = OB_SUCCESS;
DEBUG_SYNC(STOP_RECOVERY_LS_THREAD1);
if (OB_UNLIKELY(!tenant_info.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("tenant info is invalid", KR(ret), K(tenant_info));
} else if (OB_UNLIKELY(!inited_) || OB_ISNULL(proxy_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret), K(inited_), KP(proxy_));
} else {
palf::PalfHandleGuard palf_handle_guard;
ObLSRecoveryStat ls_recovery_stat;
ObLSRecoveryStatOperator ls_recovery;
if (OB_FAIL(init_palf_handle_guard_(palf_handle_guard))) {
LOG_WARN("failed to init palf handle guard", KR(ret));
} else if (!start_scn.is_valid()) {
ObLSRecoveryStat ls_recovery_stat;
if (OB_FAIL(ls_recovery.get_ls_recovery_stat(tenant_id_,
SYS_LS, false, ls_recovery_stat, *proxy_))) {
LOG_WARN("failed to load sys recovery stat", KR(ret), K(tenant_id_));
@ -227,14 +259,8 @@ void ObRecoveryLSService::do_work()
if (OB_FAIL(ret)) {
start_scn.reset();
}
}//end thread1
LOG_INFO("[LS_RECOVERY] finish one round", KR(ret), KR(tmp_ret),
K(start_scn), K(thread_idx), K(tenant_info), K(idle_time_us), K_(restore_status));
idle(idle_time_us);
ret = OB_SUCCESS;
}//end while
restore_status_.reset();
}
return ret;
}
int ObRecoveryLSService::init_palf_handle_guard_(palf::PalfHandleGuard &palf_handle_guard)
@ -326,6 +352,8 @@ int ObRecoveryLSService::process_ls_log_(
start_scn.reset();
restore_status_.reset(); // need to reset restore status if iterate to recovery end
}
//防止sync_scn - 1被汇报上去
sync_scn.reset();
} else if (OB_FAIL(header.deserialize(log_buf, HEADER_SIZE, log_pos))) {
LOG_WARN("failed to deserialize", KR(ret), K(HEADER_SIZE));
} else if (OB_UNLIKELY(log_pos >= log_length)) {
@ -353,15 +381,22 @@ int ObRecoveryLSService::process_ls_log_(
if (OB_SUCC(ret)) {
last_sync_scn = sync_scn;
} else if (sync_scn.is_valid()) {
//如果本条日志不可用,至少汇报sync_scn - 1,
//防止拉日志齐步走和系统日志流的汇报参考其他日志流出现的死锁问题
last_sync_scn = SCN::scn_dec(sync_scn);
}
if (last_sync_scn.is_valid() && (OB_FAIL(ret) || OB_INVALID_TIMESTAMP == last_report_ts_
|| ObTimeUtility::current_time() - last_report_ts_ > 100 * 1000)) {
//if ls_operator can not process, need to report last sync scn
int tmp_ret = OB_SUCCESS;
const char* comment = OB_SUCC(ret) ? "regular report when iterate log" :
"report sync_scn -1 when not valid to process";
if (OB_TMP_FAIL(report_sys_ls_recovery_stat_(last_sync_scn, false,
"regular report when iterate log"))) {
LOG_WARN("failed to report ls recovery stat", KR(ret), KR(tmp_ret), K(last_sync_scn));
comment))) {
LOG_WARN("failed to report ls recovery stat", KR(ret), KR(tmp_ret),
K(last_sync_scn), K(comment));
}
}
}

View File

@ -87,6 +87,10 @@ public:
virtual void do_work() override;
DEFINE_MTL_FUNC(ObRecoveryLSService)
private:
int process_thread0_(const ObAllTenantInfo &tenant_info);
int process_thread1_(const ObAllTenantInfo &tenant_info,
share::SCN &start_scn,
palf::PalfBufferIterator &iterator);
//get log iterator by start_scn
//interface for thread0
int init_palf_handle_guard_(palf::PalfHandleGuard &palf_handle_guard);
@ -150,6 +154,7 @@ private:
int do_update_restore_source_(ObRestoreSourceServiceAttr &old_attr, ObLogRestoreSourceMgr &restore_source_mgr);
int update_source_inner_table_(char *buf, const int64_t buf_size, ObMySQLTransaction &trans, const ObLogRestoreSourceItem &item);
int get_ls_all_replica_readable_scn_(const share::ObLSID &ls_id, share::SCN &reabable_scn);
private:
bool inited_;
uint64_t tenant_id_;