fix schedule loop may hang when ls offline

This commit is contained in:
obdev
2023-05-09 03:38:39 +00:00
committed by ob-robot
parent e6af7509a0
commit 04ca63199b
2 changed files with 26 additions and 15 deletions

View File

@ -437,13 +437,11 @@ int ObTenantTabletScheduler::schedule_all_tablets_minor()
LOG_WARN("ls is null", K(ret), K(ls)); LOG_WARN("ls is null", K(ret), K(ls));
} else { } else {
const ObLSID &ls_id = ls->get_ls_id(); const ObLSID &ls_id = ls->get_ls_id();
bool need_merge = false; if (OB_TMP_FAIL(schedule_ls_minor_merge(ls_handle, schedule_tablet_cnt))) {
if (OB_FAIL(check_ls_state(*ls, need_merge))) { minor_ls_tablet_iter_.skip_cur_ls();
LOG_WARN("failed to check ls state", K(ret), K(ls_id)); if (!schedule_ignore_error(tmp_ret)) {
} else if (!need_merge) { LOG_WARN("failed to schedule ls minor merge", K(tmp_ret), K(ls_id));
// no need to merge, do nothing }
} else if (OB_TMP_FAIL(schedule_ls_minor_merge(ls_handle, schedule_tablet_cnt))) {
LOG_WARN("failed to schedule ls minor merge", K(tmp_ret), K(ls_id));
} }
} }
} }
@ -854,6 +852,7 @@ int ObTenantTabletScheduler::schedule_ls_minor_merge(
LOG_WARN("failed to check ls state", K(ret), K(ls)); LOG_WARN("failed to check ls state", K(ret), K(ls));
} else if (!need_merge) { } else if (!need_merge) {
// no need to merge, do nothing // no need to merge, do nothing
ret = OB_STATE_NOT_MATCH;
} else { } else {
ObTabletID tablet_id; ObTabletID tablet_id;
ObTabletHandle tablet_handle; ObTabletHandle tablet_handle;
@ -863,10 +862,10 @@ int ObTenantTabletScheduler::schedule_ls_minor_merge(
while (OB_SUCC(ret) && schedule_minor_flag && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls while (OB_SUCC(ret) && schedule_minor_flag && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls
bool tablet_merge_finish = false; bool tablet_merge_finish = false;
if (OB_FAIL(minor_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) { if (OB_FAIL(minor_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) {
if (OB_ITER_END == ret || OB_LS_NOT_EXIST == ret) { if (OB_ITER_END == ret) {
ret = OB_SUCCESS; ret = OB_SUCCESS;
break; break;
} else { } else if (OB_LS_NOT_EXIST != ret) {
LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle)); LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle));
} }
} else if (OB_UNLIKELY(!tablet_handle.is_valid())) { } else if (OB_UNLIKELY(!tablet_handle.is_valid())) {
@ -935,12 +934,14 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
LOG_WARN("failed to check ls state", K(ret), K(ls)); LOG_WARN("failed to check ls state", K(ret), K(ls));
} else if (!need_merge) { } else if (!need_merge) {
// no need to merge, do nothing // no need to merge, do nothing
ret = OB_STATE_NOT_MATCH;
} else if (OB_FAIL(ls.get_ls_meta().get_restore_status(restore_status))) { } else if (OB_FAIL(ls.get_ls_meta().get_restore_status(restore_status))) {
LOG_WARN("failed to get restore status", K(ret), K(ls)); LOG_WARN("failed to get restore status", K(ret), K(ls));
} else if (OB_UNLIKELY(!restore_status.is_restore_none())) { } else if (OB_UNLIKELY(!restore_status.is_restore_none())) {
if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) {
LOG_INFO("ls is in restore status, should not loop tablet to schedule", K(ret), K(ls)); LOG_INFO("ls is in restore status, should not loop tablet to schedule", K(ret), K(ls));
} }
ret = OB_STATE_NOT_MATCH;
} else { } else {
ObTabletID tablet_id; ObTabletID tablet_id;
ObTabletHandle tablet_handle; ObTabletHandle tablet_handle;
@ -977,10 +978,10 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls
bool tablet_merge_finish = false; bool tablet_merge_finish = false;
if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) { if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) {
if (OB_ITER_END == ret || OB_LS_NOT_EXIST == ret) { if (OB_ITER_END == ret) {
ret = OB_SUCCESS; ret = OB_SUCCESS;
break; break;
} else { } else if (OB_LS_NOT_EXIST != ret) {
LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle)); LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle));
} }
} else if (OB_UNLIKELY(!tablet_handle.is_valid())) { } else if (OB_UNLIKELY(!tablet_handle.is_valid())) {
@ -1122,10 +1123,11 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
} else if (OB_TMP_FAIL(schedule_ls_medium_merge( } else if (OB_TMP_FAIL(schedule_ls_medium_merge(
merge_version, ls_handle, ls_merge_finish, merge_version, ls_handle, ls_merge_finish,
all_ls_weak_read_ts_ready, schedule_tablet_cnt))) { all_ls_weak_read_ts_ready, schedule_tablet_cnt))) {
medium_ls_tablet_iter_.skip_cur_ls(); // for any errno, skip cur ls
tenant_merge_finish = false; tenant_merge_finish = false;
if (OB_SIZE_OVERFLOW == tmp_ret) { if (OB_SIZE_OVERFLOW == tmp_ret) {
break; break;
} else if (OB_LS_NOT_EXIST != tmp_ret) { } else if (!schedule_ignore_error(tmp_ret)) {
LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls)); LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls));
} }
} else { } else {
@ -1292,8 +1294,7 @@ int ObCompactionScheduleIterator::get_next_ls(ObLSHandle &ls_handle)
} else if (OB_FAIL((MTL(storage::ObLSService *)->get_ls(ls_ids_[ls_idx_], ls_handle, mod_)))) { } else if (OB_FAIL((MTL(storage::ObLSService *)->get_ls(ls_ids_[ls_idx_], ls_handle, mod_)))) {
if (OB_LS_NOT_EXIST == ret) { if (OB_LS_NOT_EXIST == ret) {
LOG_TRACE("ls not exist", K(ret), K(ls_idx_), K(ls_ids_[ls_idx_])); LOG_TRACE("ls not exist", K(ret), K(ls_idx_), K(ls_ids_[ls_idx_]));
++ls_idx_; skip_cur_ls();
tablet_ids_.reuse();
} else { } else {
LOG_WARN("failed to get ls", K(ret), K(ls_idx_), K(ls_ids_[ls_idx_])); LOG_WARN("failed to get ls", K(ret), K(ls_idx_), K(ls_ids_[ls_idx_]));
} }

View File

@ -78,6 +78,11 @@ public:
int get_next_tablet(ObLSHandle &ls_handle, ObTabletHandle &tablet_handle); int get_next_tablet(ObLSHandle &ls_handle, ObTabletHandle &tablet_handle);
void reset(); void reset();
bool is_valid() const; bool is_valid() const;
void skip_cur_ls()
{
++ls_idx_;
tablet_ids_.reuse();
}
OB_INLINE int64_t to_string(char *buf, const int64_t buf_len) const; OB_INLINE int64_t to_string(char *buf, const int64_t buf_len) const;
private: private:
static const int64_t LS_ID_ARRAY_CNT = 10; static const int64_t LS_ID_ARRAY_CNT = 10;
@ -148,7 +153,12 @@ public:
{ {
(void)ATOMIC_AAF(&error_tablet_cnt_, delta_cnt); (void)ATOMIC_AAF(&error_tablet_cnt_, delta_cnt);
} }
OB_INLINE bool schedule_ignore_error(const int ret)
{
return OB_ITER_END == ret
|| OB_STATE_NOT_MATCH == ret
|| OB_LS_NOT_EXIST == ret;
}
// major merge status control // major merge status control
void stop_major_merge(); void stop_major_merge();
void resume_major_merge(); void resume_major_merge();