add no major diagnose

This commit is contained in:
a1iive
2023-12-28 04:13:25 +00:00
committed by ob-robot
parent 7099477643
commit 9c9e4a8d1f
2 changed files with 61 additions and 54 deletions

View File

@ -539,6 +539,41 @@ ADD_DIAGNOSE_INFO(merge_type, ls_id, tablet_id, status, time, __VA_ARGS__)
#define ADD_COMMON_DIAGNOSE_INFO(merge_type, status, time, ...) \
ADD_DIAGNOSE_INFO(merge_type, UNKNOW_LS_ID, UNKNOW_TABLET_ID, status, time, __VA_ARGS__)
#define ADD_MAJOR_WAIT_SCHEDULE(time, info) \
if (ObTimeUtility::current_time_ns() > time) { \
if (DIAGNOSE_TABELT_MAX_COUNT > diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE] \
&& can_add_diagnose_info() \
&& OB_TMP_FAIL(ADD_DIAGNOSE_INFO_FOR_TABLET( \
MAJOR_MERGE, \
gen_diagnose_status(compaction_scn), \
ObTimeUtility::fast_current_time(), \
"major not schedule for long time", info, \
"max_receive_medium_snapshot", max_sync_medium_scn, \
"compaction_scn", compaction_scn, \
"tablet_snapshot", tablet.get_snapshot_version(), \
"last_major_scn", last_major_snapshot_version))) { \
LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id)); \
} \
++diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE]; \
}
#define ADD_MEDIUM_WAIT_SCHEDULE(time, info) \
if (ObTimeUtility::current_time_ns() > time) { \
if (DIAGNOSE_TABELT_MAX_COUNT > diagnose_tablet_count_[COMPACTION_DIAGNOSE_MEDIUM_NOT_SCHEDULE] \
&& can_add_diagnose_info() \
&& OB_TMP_FAIL(ADD_DIAGNOSE_INFO_FOR_TABLET( \
MEDIUM_MERGE, \
gen_diagnose_status(max_sync_medium_scn), \
ObTimeUtility::fast_current_time(), \
"medium not schedule for long time", info,\
"max_receive_medium_scn", max_sync_medium_scn, \
"tablet_snapshot", tablet.get_snapshot_version(), \
"last_major_scn", last_major_snapshot_version))) { \
LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id)); \
} \
++diagnose_tablet_count_[COMPACTION_DIAGNOSE_MEDIUM_NOT_SCHEDULE]; \
}
const char *ObCompactionDiagnoseInfo::ObDiagnoseStatusStr[DIA_STATUS_MAX] = {
"NOT_SCHEDULE",
"RUNNING",
@ -721,8 +756,6 @@ int ObCompactionDiagnoseMgr::get_and_set_suspect_info(
LOG_WARN("failed to add dignose info", K(ret), K(tmp_str));
} else if (OB_HASH_NOT_EXIST != ret) {
LOG_WARN("failed get suspect info", K(ret), K(ls_id));
} else {
ret = OB_SUCCESS;
}
return ret;
}
@ -844,9 +877,8 @@ void ObCompactionDiagnoseMgr::diagnose_tenant_ls(
LOG_WARN_RET(tmp_ret, "failed to add dignose info about weak read ts", K(tmp_ret), K(compaction_scn));
}
// check ls suspect info for memtable freezing // ls level
if (OB_TMP_FAIL(get_and_set_suspect_info(MINI_MERGE, ls_id, UNKNOW_TABLET_ID))) {
LOG_WARN_RET(tmp_ret, "failed to add dignose info about ls suspect info for memtable freezing", K(tmp_ret));
}
(void) get_and_set_suspect_info(MINI_MERGE, ls_id, UNKNOW_TABLET_ID);
// check ls suspect info for ls locality change
if (is_leader && MTL(ObTenantMediumChecker*)->locality_cache_empty()) {
if (can_add_diagnose_info()
@ -1270,8 +1302,8 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_mini_merge(
tablet))) {
LOG_WARN("diagnose failed", K(tmp_ret), K(ls_id), "tablet_id", tablet.get_tablet_meta().tablet_id_, KPC(latest_sstable));
}
} else if (OB_TMP_FAIL(get_and_set_suspect_info(MINI_MERGE, ls_id, tablet_id))) { // mini compaction finish, but memtable have not release
LOG_WARN("failed to add dignose info about memtable release", K(tmp_ret));
} else {
(void) get_and_set_suspect_info(MINI_MERGE, ls_id, tablet_id);
}
}
return ret;
@ -1315,44 +1347,25 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_major_merge(
} else {
LOG_TRACE("diagnose tablet major merge", K(ls_id), K(tablet_id), K(compaction_scn), K(max_sync_medium_scn), K(last_major_snapshot_version));
if (last_major_snapshot_version < compaction_scn) {
// max_sync_medium_scn > last_major_snapshot_version means last compaction is not finished,
// this will be diagnosed in diagnose_tablet_medium_merge
if (max_sync_medium_scn < compaction_scn
&& max_sync_medium_scn == last_major_snapshot_version) {
// now last compaction finish
if (OB_TMP_FAIL(get_and_set_suspect_info(MEDIUM_MERGE, ls_id, tablet_id))) {
LOG_WARN("failed get major merge suspect info", K(ret), K(ls_id));
}
if (OB_HASH_NOT_EXIST == tmp_ret
&& ObTimeUtility::current_time_ns() > compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL * 2) {
if (DIAGNOSE_TABELT_MAX_COUNT > diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE]
&& can_add_diagnose_info()
&& OB_TMP_FAIL(ADD_DIAGNOSE_INFO_FOR_TABLET(
MAJOR_MERGE,
gen_diagnose_status(compaction_scn),
ObTimeUtility::fast_current_time(),
"major not schedule for long time, interval", static_cast<int64_t>(WAIT_MEDIUM_SCHEDULE_INTERVAL * 2 / NS_TIME),
"max_receive_medium_snapshot", max_sync_medium_scn,
"compaction_scn", compaction_scn))) {
LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id));
}
++diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE];
}
} else if (max_sync_medium_scn >= compaction_scn) {
if (tablet.get_snapshot_version() < compaction_scn) { // wait mini compaction or tablet freeze
if (ObTimeUtility::current_time_ns() > compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL) {
if (DIAGNOSE_TABELT_MAX_COUNT > diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE] && can_add_diagnose_info()) {
ADD_DIAGNOSE_INFO_FOR_TABLET(
MAJOR_MERGE,
gen_diagnose_status(compaction_scn),
ObTimeUtility::fast_current_time(),
"major wait for freeze, interval", static_cast<int64_t>(WAIT_MEDIUM_SCHEDULE_INTERVAL / NS_TIME),
"compaction_scn", compaction_scn,
"tablet_snapshot", tablet.get_snapshot_version());
if (max_sync_medium_scn < compaction_scn) {
// max_sync_medium_scn > last_major_snapshot_version means last compaction is not finished,
// this will be diagnosed in diagnose_tablet_medium_merge
if (max_sync_medium_scn == last_major_snapshot_version) {
// now last compaction finish
if (OB_HASH_NOT_EXIST == get_and_set_suspect_info(MEDIUM_MERGE, ls_id, tablet_id)) {
const char *info = "medium info behind major";
if (0 == last_major_snapshot_version) {
info = "medium info behind major & no major sstable";
}
ADD_MAJOR_WAIT_SCHEDULE(compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL * 2, info);
}
++diagnose_tablet_count_[COMPACTION_DIAGNOSE_MAJOR_NOT_SCHEDULE];
}
} else if (tablet.get_snapshot_version() < compaction_scn) { // wait mini compaction or tablet freeze
const char* info = "major wait for freeze";
ADD_MAJOR_WAIT_SCHEDULE(compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL, info);
} else if (0 == last_major_snapshot_version) {
const char* info = "no major sstable";
ADD_MAJOR_WAIT_SCHEDULE(compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL, info);
}
if (OB_TMP_FAIL(diagnose_tablet_merge(
MEDIUM_MERGE,
@ -1388,18 +1401,11 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_medium_merge(
if (!diagnose_major_flag || (diagnose_major_flag && max_sync_medium_scn < compaction_scn)) {
if (max_sync_medium_scn > last_major_snapshot_version) {
if (tablet.get_snapshot_version() < max_sync_medium_scn) { // wait mini compaction or tablet freeze
if (ObTimeUtility::current_time_ns() > max_sync_medium_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL) {
if (DIAGNOSE_TABELT_MAX_COUNT > diagnose_tablet_count_[COMPACTION_DIAGNOSE_MEDIUM_NOT_SCHEDULE] && can_add_diagnose_info()) {
ADD_DIAGNOSE_INFO_FOR_TABLET(
MEDIUM_MERGE,
gen_diagnose_status(max_sync_medium_scn),
ObTimeUtility::fast_current_time(),
"medium wait for freeze, interval", static_cast<int64_t>(WAIT_MEDIUM_SCHEDULE_INTERVAL / NS_TIME),
"max_receive_medium_scn", max_sync_medium_scn,
"tablet_snapshot", tablet.get_snapshot_version());
}
++diagnose_tablet_count_[COMPACTION_DIAGNOSE_MEDIUM_NOT_SCHEDULE];
}
const char *info = "medium wait for freeze";
ADD_MEDIUM_WAIT_SCHEDULE(max_sync_medium_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL, info);
} else if (0 == last_major_snapshot_version) {
const char *info = "no major sstable";
ADD_MEDIUM_WAIT_SCHEDULE(max_sync_medium_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL, info);
} else if (OB_TMP_FAIL(diagnose_tablet_merge(
MEDIUM_MERGE,
ls_id,

View File

@ -1678,6 +1678,7 @@ int ObTenantTabletScheduler::schedule_tablet_medium(
K(could_major_merge), K(enable_adaptive_compaction), K(tablet_could_schedule_merge));
if (OB_FAIL(ret) || !is_leader || 0 >= last_major_snapshot_version) {
// follower or no major: do nothing
need_diagnose = true;
} else if (OB_FAIL(tablet.read_medium_info_list(tmp_allocator, medium_list))) {
LOG_WARN("failed to load medium info list", K(ret), K(tablet_id));
} else if (FALSE_IT(time_guard.click(ObCompactionScheduleTimeGuard::READ_MEDIUM_INFO))){