[FEAT MERGE] log4100 branch
Co-authored-by: tino247 <tino247@126.com> Co-authored-by: BinChenn <binchenn.bc@gmail.com> Co-authored-by: HaHaJeff <jeffzhouhhh@gmail.com>
This commit is contained in:
@ -65,6 +65,8 @@ int ObRecoveryLSService::init()
|
||||
} else if (OB_FAIL(ObTenantThreadHelper::create("RecLSSer",
|
||||
lib::TGDefIDs::LSService, *this))) {
|
||||
LOG_WARN("failed to create thread", KR(ret));
|
||||
} else if (OB_FAIL(ObTenantThreadHelper::start())) {
|
||||
LOG_WARN("failed to start thread", KR(ret));
|
||||
} else {
|
||||
tenant_id_ = MTL_ID();
|
||||
proxy_ = GCTX.sql_proxy_;
|
||||
@ -98,12 +100,19 @@ void ObRecoveryLSService::do_work()
|
||||
while (!has_set_stop()) {
|
||||
ObCurTraceId::init(GCONF.self_addr_);
|
||||
uint64_t thread_idx = get_thread_idx();
|
||||
ObTenantRecoveryReportor *tenant_report = MTL(ObTenantRecoveryReportor*);
|
||||
ObAllTenantInfo tenant_info;
|
||||
//two thread for seed log and recovery_ls_manager
|
||||
if (!is_user_tenant(tenant_id_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("ls recovery thread must run on user tenant", KR(ret), K(tenant_id_));
|
||||
} else if (OB_FAIL(check_can_do_recovery_())) {
|
||||
LOG_WARN("failed to check do recovery", KR(ret));
|
||||
} else if (OB_ISNULL(tenant_report)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant report is null", KR(ret), K(tenant_id_));
|
||||
} else if (OB_FAIL(tenant_report->get_tenant_info(tenant_info))) {
|
||||
LOG_WARN("failed to get tenant info", KR(ret));
|
||||
} else if (OB_FAIL(check_can_do_recovery_(tenant_info))) {
|
||||
LOG_WARN("failed to check do recovery", KR(ret), K(tenant_info));
|
||||
} else if (0 == thread_idx) {
|
||||
if (OB_SUCCESS != (tmp_ret = process_recovery_ls_manager())) {
|
||||
ret = OB_SUCC(ret) ? tmp_ret : ret;
|
||||
@ -122,6 +131,11 @@ void ObRecoveryLSService::do_work()
|
||||
} else if (OB_FAIL(report_sys_ls_recovery_stat_(ls_recovery_stat.get_sync_scn()))) {
|
||||
//may recovery end, but readable scn need report
|
||||
LOG_WARN("failed to report ls recovery stat", KR(ret), K(ls_recovery_stat));
|
||||
} else if (tenant_info.get_recovery_until_scn() == ls_recovery_stat.get_sync_scn()) {
|
||||
ret = OB_EAGAIN;
|
||||
if (REACH_TIME_INTERVAL(60 * 1000 * 1000)) { // every minute
|
||||
LOG_INFO("has recovered to recovery_until_scn", KR(ret), K(ls_recovery_stat), K(tenant_info));
|
||||
}
|
||||
} else if (OB_FAIL(seek_log_iterator_(ls_recovery_stat.get_sync_scn(), iterator))) {
|
||||
LOG_WARN("failed to seek log iterator", KR(ret), K(ls_recovery_stat));
|
||||
} else {
|
||||
@ -129,19 +143,21 @@ void ObRecoveryLSService::do_work()
|
||||
LOG_INFO("start to seek at", K(start_scn));
|
||||
}
|
||||
}
|
||||
if (FAILEDx(process_ls_log_(start_scn, iterator))) {
|
||||
LOG_WARN("failed to process ls log", KR(ret), K(start_scn));
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (start_scn.is_valid() && OB_FAIL(process_ls_log_(tenant_info, start_scn, iterator))) {
|
||||
if (OB_ITER_STOP != ret) {
|
||||
LOG_WARN("failed to process ls log", KR(ret), K(start_scn), K(tenant_info));
|
||||
}
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
start_scn.reset();
|
||||
LOG_WARN("failed to do wowrk", KR(ret), K(start_scn));
|
||||
}
|
||||
|
||||
if (REACH_TIME_INTERVAL(10 * 1000 * 1000)) { // every 10 second
|
||||
(void)try_tenant_upgrade_end_();
|
||||
}
|
||||
}
|
||||
LOG_INFO("[LS_RECOVERY] finish one round", KR(ret), KR(tmp_ret), K(start_scn), K(thread_idx));
|
||||
LOG_INFO("[LS_RECOVERY] finish one round", KR(ret), KR(tmp_ret), K(start_scn), K(thread_idx), K(tenant_info));
|
||||
idle(idle_time_us);
|
||||
}
|
||||
}
|
||||
@ -180,7 +196,10 @@ int ObRecoveryLSService::seek_log_iterator_(const SCN &sync_scn, PalfBufferItera
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObRecoveryLSService::process_ls_log_(const SCN &start_scn, PalfBufferIterator &iterator)
|
||||
int ObRecoveryLSService::process_ls_log_(
|
||||
const ObAllTenantInfo &tenant_info,
|
||||
SCN &start_scn,
|
||||
PalfBufferIterator &iterator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
palf::LogEntry log_entry;
|
||||
@ -190,6 +209,9 @@ int ObRecoveryLSService::process_ls_log_(const SCN &start_scn, PalfBufferIterato
|
||||
if (OB_UNLIKELY(!inited_)) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not init", KR(ret), K(inited_));
|
||||
} else if (OB_UNLIKELY(!tenant_info.is_valid())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("tenant_info is invalid", KR(ret), K(tenant_info));
|
||||
}
|
||||
while (OB_SUCC(ret) && OB_SUCC(iterator.next())) {
|
||||
if (OB_FAIL(iterator.get_entry(log_entry, target_lsn))) {
|
||||
@ -208,6 +230,20 @@ int ObRecoveryLSService::process_ls_log_(const SCN &start_scn, PalfBufferIterato
|
||||
if (OB_UNLIKELY(sync_scn <= start_scn)) {
|
||||
//通过scn定位的LSN是不准确的,可能会获取多余的数据,所以需要把小于等于sync_scn的过滤掉
|
||||
continue;
|
||||
} else if (tenant_info.get_recovery_until_scn() < sync_scn) {
|
||||
if (OB_FAIL(report_sys_ls_recovery_stat_(tenant_info.get_recovery_until_scn()))) {
|
||||
LOG_WARN("failed to report_sys_ls_recovery_stat_", KR(ret), K(sync_scn), K(tenant_info),
|
||||
K(log_entry), K(target_lsn), K(start_scn));
|
||||
// SYS LS has recovered to the recovery_until_scn, need stop iterate SYS LS log and reset start_scn
|
||||
} else if (OB_FAIL(seek_log_iterator_(tenant_info.get_recovery_until_scn(), iterator))) {
|
||||
LOG_WARN("failed to seek log iterator", KR(ret), K(sync_scn), K(tenant_info),
|
||||
K(log_entry), K(target_lsn), K(start_scn));
|
||||
} else {
|
||||
ret = OB_ITER_STOP;
|
||||
LOG_WARN("SYS LS has recovered to the recovery_until_scn, need stop iterate SYS LS log",
|
||||
KR(ret), K(sync_scn), K(tenant_info), K(log_entry), K(target_lsn), K(start_scn));
|
||||
start_scn.reset();
|
||||
}
|
||||
} else if (OB_FAIL(header.deserialize(log_buf, HEADER_SIZE, log_pos))) {
|
||||
LOG_WARN("failed to deserialize", KR(ret), K(HEADER_SIZE));
|
||||
} else if (OB_UNLIKELY(log_pos >= log_length)) {
|
||||
@ -249,9 +285,6 @@ int ObRecoveryLSService::process_ls_log_(const SCN &start_scn, PalfBufferIterato
|
||||
if (OB_FAIL(report_sys_ls_recovery_stat_(sync_scn))) {
|
||||
LOG_WARN("failed to report ls recovery stat", KR(ret), K(sync_scn));
|
||||
}
|
||||
if (FAILEDx(update_sys_ls_restore_finish_())) {
|
||||
LOG_WARN("failed to update sys ls recovery finish", KR(ret), K(sync_scn));
|
||||
}
|
||||
} else if (OB_SUCC(ret)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("iterator must be end", KR(ret));
|
||||
@ -484,7 +517,7 @@ int ObRecoveryLSService::process_gc_log_(logservice::ObGCLSLog &gc_log, const SC
|
||||
} else if (OB_FAIL(trans.start(proxy_, meta_tenant_id))) {
|
||||
LOG_WARN("failed to start trans", KR(ret), K(meta_tenant_id));
|
||||
} else if (OB_FAIL(ls_life_agent.set_ls_offline_in_trans(
|
||||
tenant_id_, SYS_LS, share::OB_LS_TENANT_DROPPING, sync_scn,
|
||||
tenant_id_, SYS_LS, share::OB_LS_TENANT_DROPPING, sync_scn, share::NORMAL_SWITCHOVER_STATUS,
|
||||
trans))) {
|
||||
LOG_WARN("failed to set offline", KR(ret), K(tenant_id_), K(sync_scn));
|
||||
} else {
|
||||
@ -625,12 +658,12 @@ int ObRecoveryLSService::process_ls_operator_in_trans_(
|
||||
LOG_WARN("failed to create new ls", KR(ret), K(sync_scn), K(ls_attr));
|
||||
}
|
||||
} else if (share::is_ls_create_abort_op(ls_attr.get_ls_operatin_type())) {
|
||||
if (OB_FAIL(ls_life_agent.drop_ls_in_trans(tenant_id_, ls_attr.get_ls_id(), trans))) {
|
||||
if (OB_FAIL(ls_life_agent.drop_ls_in_trans(tenant_id_, ls_attr.get_ls_id(), share::NORMAL_SWITCHOVER_STATUS, trans))) {
|
||||
LOG_WARN("failed to drop ls", KR(ret), K(tenant_id_), K(ls_attr));
|
||||
}
|
||||
} else if (share::is_ls_drop_end_op(ls_attr.get_ls_operatin_type())) {
|
||||
if (OB_FAIL(ls_life_agent.set_ls_offline_in_trans(tenant_id_, ls_attr.get_ls_id(),
|
||||
ls_attr.get_ls_status(), sync_scn, trans))) {
|
||||
ls_attr.get_ls_status(), sync_scn, share::NORMAL_SWITCHOVER_STATUS, trans))) {
|
||||
LOG_WARN("failed to set offline", KR(ret), K(tenant_id_), K(ls_attr), K(sync_scn));
|
||||
}
|
||||
} else {
|
||||
@ -735,19 +768,15 @@ int ObRecoveryLSService::process_recovery_ls_manager()
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObRecoveryLSService::check_can_do_recovery_()
|
||||
int ObRecoveryLSService::check_can_do_recovery_(const ObAllTenantInfo &tenant_info)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObTenantRecoveryReportor *tenant_report = MTL(ObTenantRecoveryReportor*);
|
||||
ObAllTenantInfo tenant_info;
|
||||
if (OB_UNLIKELY(!inited_)) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not init", K(ret), K(inited_));
|
||||
} else if (OB_ISNULL(tenant_report)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant report is null", KR(ret), K(tenant_id_));
|
||||
} else if (OB_FAIL(tenant_report->get_tenant_info(tenant_info))) {
|
||||
LOG_WARN("failed to get tenant info", KR(ret));
|
||||
} else if (OB_UNLIKELY(!tenant_info.is_valid())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("tenant_info is invalid", KR(ret), K(tenant_info));
|
||||
} else if (tenant_info.is_primary()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("primary tenant can not do recovery", KR(ret), K(tenant_info));
|
||||
@ -801,73 +830,6 @@ int ObRecoveryLSService::report_sys_ls_recovery_stat_(const SCN &sync_scn)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObRecoveryLSService::update_sys_ls_restore_finish_()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_UNLIKELY(!inited_)) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("not init", KR(ret), K(inited_));
|
||||
} else if (OB_ISNULL(proxy_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("sql can't null", K(ret), K(proxy_));
|
||||
} else {
|
||||
ObLSService *ls_svr = MTL(ObLSService *);
|
||||
ObLSHandle ls_handle;
|
||||
if (OB_ISNULL(ls_svr)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("ls service is null", KR(ret));
|
||||
} else if (OB_FAIL(ls_svr->get_ls(SYS_LS, ls_handle, storage::ObLSGetMod::RS_MOD))) {
|
||||
LOG_WARN("failed to get ls", KR(ret));
|
||||
} else {
|
||||
logservice::ObLogRestoreHandler *restore_handler = NULL;
|
||||
ObLS *ls = NULL;
|
||||
if (OB_ISNULL(ls = ls_handle.get_ls())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("ls is NULL", K(ret), K(ls));
|
||||
} else if (OB_ISNULL(restore_handler = ls->get_log_restore_handler())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("restore_handler is NULL", K(ret), K(restore_handler));
|
||||
} else {
|
||||
ObLSRecoveryStatOperator ls_recovery;
|
||||
ObLSRecoveryStat ls_recovery_stat;
|
||||
SCN max_ls_scn;
|
||||
SCN recovery_until_scn;
|
||||
if (OB_FAIL(ls_recovery.get_ls_recovery_stat(tenant_id_,
|
||||
SYS_LS, false, ls_recovery_stat, *proxy_))) {
|
||||
LOG_WARN("failed to load sys recovery stat", KR(ret), K(tenant_id_));
|
||||
} else if (OB_FAIL(restore_handler->get_max_restore_scn(max_ls_scn))) {
|
||||
LOG_WARN("failed to get max restore log ts", KR(ret));
|
||||
} else if (max_ls_scn == ls_recovery_stat.get_sync_scn()) {
|
||||
//restore finish, update sync scn to restore_unti_scn
|
||||
if (OB_FAIL(restore_handler->get_upper_limit_scn(recovery_until_scn))) {
|
||||
LOG_WARN("failed to get upper limit ts", KR(ret));
|
||||
} else if (OB_UNLIKELY(!recovery_until_scn.is_valid())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("invalid recovery_until_scn", KR(ret));
|
||||
} else if (ls_recovery_stat.get_sync_scn() == recovery_until_scn) {
|
||||
//nothing
|
||||
} else if (ls_recovery_stat.get_sync_scn() > recovery_until_scn) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_ERROR("in restore status, sync scn can not larger than recovery until scn", KR(ret),
|
||||
K(recovery_until_scn), K(ls_recovery_stat), K(max_ls_scn));
|
||||
} else {
|
||||
ObLSRecoveryStat ls_recovery_stat_new;
|
||||
if (OB_FAIL(construct_ls_recovery_stat(recovery_until_scn, ls_recovery_stat_new))) {
|
||||
LOG_WARN("failed to construct ls recovery stat", KR(ret), K(recovery_until_scn));
|
||||
} else if (OB_FAIL(ls_recovery.update_ls_recovery_stat(
|
||||
ls_recovery_stat_new, *proxy_))) {
|
||||
LOG_WARN("failed to update ls recovery stat", KR(ret), K(ls_recovery_stat));
|
||||
} else {
|
||||
LOG_INFO("[RECOVERY LS] SYS LS RESTORE FINISH", K(recovery_until_scn), K(max_ls_scn), K(ls_recovery_stat));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}//end of namespace rootserver
|
||||
}//end of namespace oceanbase
|
||||
|
||||
|
||||
Reference in New Issue
Block a user