fix compaction progress & add compacton diagnose info about ls locality
This commit is contained in:
@ -379,6 +379,30 @@ int ObCompactionDiagnoseMgr::get_suspect_info(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCompactionDiagnoseMgr::diagnose_ls_merge(
|
||||
const ObMergeType merge_type,
|
||||
const ObLSID &ls_id)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObScheduleSuspectInfo ret_info;
|
||||
if (OB_FAIL(get_suspect_info(merge_type, ls_id, ObTabletID(INT64_MAX), ret_info))) {
|
||||
if (OB_HASH_NOT_EXIST != ret) {
|
||||
LOG_WARN("failed get ls merge suspect info", K(ret), K(ls_id));
|
||||
}
|
||||
} else if (can_add_diagnose_info()) {
|
||||
SET_DIAGNOSE_INFO(
|
||||
info_array_[idx_++],
|
||||
merge_type,
|
||||
ret_info.tenant_id_,
|
||||
ls_id,
|
||||
ObTabletID(INT64_MAX),
|
||||
ObCompactionDiagnoseInfo::DIA_STATUS_FAILED,
|
||||
ret_info.add_time_,
|
||||
"schedule_suspect_info", ret_info.suspect_info_);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -476,21 +500,13 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
||||
compaction_scn);
|
||||
}
|
||||
// check ls suspect info for memtable freezing
|
||||
ObScheduleSuspectInfo ret_info;
|
||||
if (OB_TMP_FAIL(get_suspect_info(MINI_MERGE, ls_id, ObTabletID(INT64_MAX), ret_info))) {
|
||||
if (OB_HASH_NOT_EXIST != tmp_ret) {
|
||||
LOG_WARN("failed get ls merge suspect info", K(tmp_ret), K(ls_id));
|
||||
}
|
||||
} else if (can_add_diagnose_info()) {
|
||||
SET_DIAGNOSE_INFO(
|
||||
info_array_[idx_++],
|
||||
MINI_MERGE,
|
||||
ret_info.tenant_id_,
|
||||
ls_id,
|
||||
ObTabletID(INT64_MAX),
|
||||
ObCompactionDiagnoseInfo::DIA_STATUS_FAILED,
|
||||
ret_info.add_time_,
|
||||
"schedule_suspect_info", ret_info.suspect_info_);
|
||||
if (OB_TMP_FAIL(diagnose_ls_merge(MINI_MERGE, ls_id))) {
|
||||
LOG_WARN("failed to diagnose about memtable freezing", K(tmp_ret));
|
||||
}
|
||||
|
||||
// check ls locality change and leader change
|
||||
if (OB_TMP_FAIL(diagnose_ls_merge(MEDIUM_MERGE, ls_id))) {
|
||||
LOG_WARN("failed to diagnose about ls locality change", K(tmp_ret));
|
||||
}
|
||||
ObLSTabletIterator tablet_iter(ObTabletCommon::NO_CHECK_GET_TABLET_TIMEOUT_US);
|
||||
ObLSVTInfo ls_info;
|
||||
@ -553,7 +569,7 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
||||
ObTabletID(INT64_MAX),
|
||||
ObCompactionDiagnoseInfo::DIA_STATUS_FINISH,
|
||||
ObTimeUtility::fast_current_time(),
|
||||
"test: compaction has finished in storage, please check RS. compaction_scn", compaction_scn);
|
||||
"compaction has finished in storage, please check RS. compaction_scn", compaction_scn);
|
||||
if (!abnormal_ls_id.empty()) {
|
||||
char * buf = info.diagnose_info_;
|
||||
const int64_t buf_len = common::OB_DIAGNOSE_INFO_LENGTH;
|
||||
|
||||
@ -130,6 +130,9 @@ public:
|
||||
ObDiagnoseTabletCompProgress &input_progress);
|
||||
static int check_system_compaction_config(char *tmp_str, const int64_t buf_len);
|
||||
private:
|
||||
int diagnose_ls_merge(
|
||||
const ObMergeType merge_type,
|
||||
const ObLSID &ls_id);
|
||||
int diagnose_tablet_mini_merge(const ObLSID &ls_id, ObTablet &tablet);
|
||||
int diagnose_tablet_minor_merge(const ObLSID &ls_id, ObTablet &tablet);
|
||||
int diagnose_tablet_medium_merge(
|
||||
@ -209,20 +212,21 @@ private:
|
||||
|
||||
#define DEL_SUSPECT_INFO(type, ls_id, tablet_id) \
|
||||
{ \
|
||||
int tmp_ret = OB_SUCCESS; \
|
||||
compaction::ObMergeDagHash dag_hash; \
|
||||
dag_hash.merge_type_ = type; \
|
||||
dag_hash.ls_id_ = ls_id; \
|
||||
dag_hash.tablet_id_ = tablet_id; \
|
||||
int64_t tenant_id = MTL_ID(); \
|
||||
int64_t hash_value = ObScheduleSuspectInfo::gen_hash(tenant_id, dag_hash.inner_hash()); \
|
||||
if (OB_FAIL(ObScheduleSuspectInfoMgr::get_instance().del_suspect_info(hash_value))) { \
|
||||
if (OB_HASH_NOT_EXIST != ret) { \
|
||||
STORAGE_LOG(WARN, "failed to add suspect info", K(ret), K(dag_hash), K(tenant_id)); \
|
||||
if (OB_TMP_FAIL(ObScheduleSuspectInfoMgr::get_instance().del_suspect_info(hash_value))) { \
|
||||
if (OB_HASH_NOT_EXIST != tmp_ret) { \
|
||||
STORAGE_LOG(WARN, "failed to add suspect info", K(tmp_ret), K(dag_hash), K(tenant_id)); \
|
||||
} else { \
|
||||
ret = OB_SUCCESS; \
|
||||
tmp_ret = OB_SUCCESS; \
|
||||
} \
|
||||
} else { \
|
||||
STORAGE_LOG(DEBUG, "success to add suspect info", K(ret), K(dag_hash), K(tenant_id)); \
|
||||
STORAGE_LOG(DEBUG, "success to add suspect info", K(tmp_ret), K(dag_hash), K(tenant_id)); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
@ -921,7 +921,6 @@ int ObTenantTabletScheduler::schedule_ls_minor_merge(
|
||||
int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||
int64_t &merge_version,
|
||||
ObLSHandle &ls_handle,
|
||||
bool &ls_merge_finish,
|
||||
bool &all_ls_weak_read_ts_ready,
|
||||
int64_t &schedule_tablet_cnt)
|
||||
{
|
||||
@ -968,6 +967,11 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||
is_leader = true;
|
||||
if (OB_FAIL(ls_locality_cache_.get_ls_locality(ls_id, ls_locality))) {
|
||||
LOG_WARN("failed to get ls locality", K(ret), K(ls_id));
|
||||
} else if (0 == ls_locality.svr_addr_list_.count()) {
|
||||
ADD_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX),
|
||||
"maybe bad case: locality change and leader change", K(ls_locality));
|
||||
} else {
|
||||
DEL_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -1048,7 +1052,7 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||
LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
}
|
||||
}
|
||||
ls_merge_finish &= tablet_merge_finish;
|
||||
medium_ls_tablet_iter_.update_merge_finish(tablet_merge_finish);
|
||||
}
|
||||
} // end of while
|
||||
} // else
|
||||
@ -1076,7 +1080,6 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
} else if (!medium_ls_tablet_iter_.is_valid() && OB_FAIL(medium_ls_tablet_iter_.build_iter())) {
|
||||
LOG_WARN("failed to init iterator", K(ret));
|
||||
} else {
|
||||
bool tenant_merge_finish = true;
|
||||
bool all_ls_weak_read_ts_ready = true;
|
||||
bool check_report_scn_flag = false;
|
||||
int64_t merge_version = get_frozen_version();
|
||||
@ -1109,7 +1112,6 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
#endif
|
||||
|
||||
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) {
|
||||
bool ls_merge_finish = true;
|
||||
if (OB_FAIL(medium_ls_tablet_iter_.get_next_ls(ls_handle))) {
|
||||
if (OB_ITER_END == ret) {
|
||||
ret = OB_SUCCESS;
|
||||
@ -1121,18 +1123,16 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("ls is null", K(ret), K(ls));
|
||||
} else if (OB_TMP_FAIL(schedule_ls_medium_merge(
|
||||
merge_version, ls_handle, ls_merge_finish,
|
||||
merge_version, ls_handle,
|
||||
all_ls_weak_read_ts_ready, schedule_tablet_cnt))) {
|
||||
medium_ls_tablet_iter_.skip_cur_ls(); // for any errno, skip cur ls
|
||||
tenant_merge_finish = false;
|
||||
medium_ls_tablet_iter_.update_merge_finish(false);
|
||||
if (OB_SIZE_OVERFLOW == tmp_ret) {
|
||||
break;
|
||||
} else if (!schedule_ignore_error(tmp_ret)) {
|
||||
LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls));
|
||||
}
|
||||
} else {
|
||||
tenant_merge_finish &= ls_merge_finish;
|
||||
|
||||
// loop tablet_meta table to update smaller report_scn because of migration
|
||||
if (check_report_scn_flag) {
|
||||
(void) update_report_scn_as_ls_leader(*ls);
|
||||
@ -1140,9 +1140,9 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
}
|
||||
} // end while
|
||||
|
||||
if (!tenant_merge_finish) { // wait major compaction
|
||||
if (!medium_ls_tablet_iter_.tenant_merge_finish()) { // wait major compaction
|
||||
if (all_ls_weak_read_ts_ready) { // check schedule Timer Task
|
||||
if (schedule_stats_.add_weak_read_ts_event_flag_) {
|
||||
if (schedule_stats_.add_weak_read_ts_event_flag_ && medium_ls_tablet_iter_.is_scan_finish()) { // all ls scan finish
|
||||
schedule_stats_.add_weak_read_ts_event_flag_ = false;
|
||||
ADD_COMPACTION_EVENT(
|
||||
MTL_ID(),
|
||||
@ -1159,7 +1159,7 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
}
|
||||
}
|
||||
|
||||
if (REACH_TENANT_TIME_INTERVAL(ADD_LOOP_EVENT_INTERVAL)) {
|
||||
if (medium_ls_tablet_iter_.is_scan_finish() && REACH_TENANT_TIME_INTERVAL(ADD_LOOP_EVENT_INTERVAL)) {
|
||||
ADD_COMPACTION_EVENT(
|
||||
MTL_ID(),
|
||||
MAJOR_MERGE,
|
||||
@ -1171,7 +1171,7 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && tenant_merge_finish && merge_version > merged_version_) {
|
||||
if (OB_SUCC(ret) && medium_ls_tablet_iter_.tenant_merge_finish() && merge_version > merged_version_) {
|
||||
merged_version_ = merge_version;
|
||||
LOG_INFO("all tablet major merge finish", K(merged_version_), K(merge_version));
|
||||
DEL_SUSPECT_INFO(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX));
|
||||
@ -1194,9 +1194,12 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
||||
reload_tenant_config(); // tenant merge finish, use tenant default config to loop
|
||||
}
|
||||
|
||||
LOG_INFO("finish schedule all tablet merge", K(merge_version), K(schedule_stats_), K(tenant_merge_finish),
|
||||
LOG_INFO("finish schedule all tablet merge", K(merge_version), K(schedule_stats_),
|
||||
"tenant_merge_finish", medium_ls_tablet_iter_.tenant_merge_finish(),
|
||||
K(merged_version_), K(schedule_tablet_cnt));
|
||||
schedule_stats_.clear_tablet_cnt();
|
||||
if (medium_ls_tablet_iter_.is_scan_finish()) {
|
||||
schedule_stats_.clear_tablet_cnt();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1275,6 +1278,8 @@ int ObCompactionScheduleIterator::build_iter()
|
||||
ls_idx_ = -1;
|
||||
tablet_idx_ = 0;
|
||||
tablet_ids_.reuse();
|
||||
scan_finish_ = false;
|
||||
merge_finish_ = true;
|
||||
LOG_TRACE("build iter", K(ret), K(ls_ids_));
|
||||
}
|
||||
return ret;
|
||||
@ -1290,6 +1295,7 @@ int ObCompactionScheduleIterator::get_next_ls(ObLSHandle &ls_handle)
|
||||
}
|
||||
do {
|
||||
if (ls_idx_ >= ls_ids_.count()) {
|
||||
scan_finish_ = true;
|
||||
ret = OB_ITER_END;
|
||||
} else if (OB_FAIL((MTL(storage::ObLSService *)->get_ls(ls_ids_[ls_idx_], ls_handle, mod_)))) {
|
||||
if (OB_LS_NOT_EXIST == ret) {
|
||||
@ -1311,6 +1317,8 @@ void ObCompactionScheduleIterator::reset()
|
||||
tablet_idx_ = 0;
|
||||
ls_ids_.reuse();
|
||||
tablet_ids_.reuse();
|
||||
scan_finish_ = false;
|
||||
merge_finish_ = false;
|
||||
}
|
||||
|
||||
bool ObCompactionScheduleIterator::is_valid() const
|
||||
|
||||
@ -66,6 +66,8 @@ public:
|
||||
const int64_t timeout_us = ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US)
|
||||
: mod_(mod),
|
||||
is_major_(is_major),
|
||||
scan_finish_(false),
|
||||
merge_finish_(false),
|
||||
timeout_us_(timeout_us),
|
||||
ls_idx_(0),
|
||||
tablet_idx_(0),
|
||||
@ -76,6 +78,11 @@ public:
|
||||
int build_iter();
|
||||
int get_next_ls(ObLSHandle &ls_handle);
|
||||
int get_next_tablet(ObLSHandle &ls_handle, ObTabletHandle &tablet_handle);
|
||||
bool is_scan_finish() const { return scan_finish_; }
|
||||
bool tenant_merge_finish() const { return merge_finish_ & scan_finish_; }
|
||||
void update_merge_finish(bool merge_finish) {
|
||||
merge_finish_ &= merge_finish;
|
||||
}
|
||||
void reset();
|
||||
bool is_valid() const;
|
||||
void skip_cur_ls()
|
||||
@ -89,6 +96,8 @@ private:
|
||||
static const int64_t TABLET_ID_ARRAY_CNT = 2000;
|
||||
ObLSGetMod mod_;
|
||||
bool is_major_;
|
||||
bool scan_finish_;
|
||||
bool merge_finish_;
|
||||
int64_t timeout_us_;
|
||||
int64_t ls_idx_;
|
||||
uint64_t tablet_idx_;
|
||||
@ -219,7 +228,6 @@ private:
|
||||
int schedule_ls_medium_merge(
|
||||
int64_t &merge_version,
|
||||
ObLSHandle &ls_handle,
|
||||
bool &ls_merge_finish,
|
||||
bool &all_ls_weak_read_ts_ready,
|
||||
int64_t &schedule_tablet_cnt);
|
||||
int schedule_ls_minor_merge(
|
||||
|
||||
Reference in New Issue
Block a user