fix compaction progress & add compacton diagnose info about ls locality
This commit is contained in:
@ -379,6 +379,30 @@ int ObCompactionDiagnoseMgr::get_suspect_info(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ObCompactionDiagnoseMgr::diagnose_ls_merge(
|
||||||
|
const ObMergeType merge_type,
|
||||||
|
const ObLSID &ls_id)
|
||||||
|
{
|
||||||
|
int ret = OB_SUCCESS;
|
||||||
|
ObScheduleSuspectInfo ret_info;
|
||||||
|
if (OB_FAIL(get_suspect_info(merge_type, ls_id, ObTabletID(INT64_MAX), ret_info))) {
|
||||||
|
if (OB_HASH_NOT_EXIST != ret) {
|
||||||
|
LOG_WARN("failed get ls merge suspect info", K(ret), K(ls_id));
|
||||||
|
}
|
||||||
|
} else if (can_add_diagnose_info()) {
|
||||||
|
SET_DIAGNOSE_INFO(
|
||||||
|
info_array_[idx_++],
|
||||||
|
merge_type,
|
||||||
|
ret_info.tenant_id_,
|
||||||
|
ls_id,
|
||||||
|
ObTabletID(INT64_MAX),
|
||||||
|
ObCompactionDiagnoseInfo::DIA_STATUS_FAILED,
|
||||||
|
ret_info.add_time_,
|
||||||
|
"schedule_suspect_info", ret_info.suspect_info_);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
@ -476,21 +500,13 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
|||||||
compaction_scn);
|
compaction_scn);
|
||||||
}
|
}
|
||||||
// check ls suspect info for memtable freezing
|
// check ls suspect info for memtable freezing
|
||||||
ObScheduleSuspectInfo ret_info;
|
if (OB_TMP_FAIL(diagnose_ls_merge(MINI_MERGE, ls_id))) {
|
||||||
if (OB_TMP_FAIL(get_suspect_info(MINI_MERGE, ls_id, ObTabletID(INT64_MAX), ret_info))) {
|
LOG_WARN("failed to diagnose about memtable freezing", K(tmp_ret));
|
||||||
if (OB_HASH_NOT_EXIST != tmp_ret) {
|
|
||||||
LOG_WARN("failed get ls merge suspect info", K(tmp_ret), K(ls_id));
|
|
||||||
}
|
}
|
||||||
} else if (can_add_diagnose_info()) {
|
|
||||||
SET_DIAGNOSE_INFO(
|
// check ls locality change and leader change
|
||||||
info_array_[idx_++],
|
if (OB_TMP_FAIL(diagnose_ls_merge(MEDIUM_MERGE, ls_id))) {
|
||||||
MINI_MERGE,
|
LOG_WARN("failed to diagnose about ls locality change", K(tmp_ret));
|
||||||
ret_info.tenant_id_,
|
|
||||||
ls_id,
|
|
||||||
ObTabletID(INT64_MAX),
|
|
||||||
ObCompactionDiagnoseInfo::DIA_STATUS_FAILED,
|
|
||||||
ret_info.add_time_,
|
|
||||||
"schedule_suspect_info", ret_info.suspect_info_);
|
|
||||||
}
|
}
|
||||||
ObLSTabletIterator tablet_iter(ObTabletCommon::NO_CHECK_GET_TABLET_TIMEOUT_US);
|
ObLSTabletIterator tablet_iter(ObTabletCommon::NO_CHECK_GET_TABLET_TIMEOUT_US);
|
||||||
ObLSVTInfo ls_info;
|
ObLSVTInfo ls_info;
|
||||||
@ -553,7 +569,7 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet()
|
|||||||
ObTabletID(INT64_MAX),
|
ObTabletID(INT64_MAX),
|
||||||
ObCompactionDiagnoseInfo::DIA_STATUS_FINISH,
|
ObCompactionDiagnoseInfo::DIA_STATUS_FINISH,
|
||||||
ObTimeUtility::fast_current_time(),
|
ObTimeUtility::fast_current_time(),
|
||||||
"test: compaction has finished in storage, please check RS. compaction_scn", compaction_scn);
|
"compaction has finished in storage, please check RS. compaction_scn", compaction_scn);
|
||||||
if (!abnormal_ls_id.empty()) {
|
if (!abnormal_ls_id.empty()) {
|
||||||
char * buf = info.diagnose_info_;
|
char * buf = info.diagnose_info_;
|
||||||
const int64_t buf_len = common::OB_DIAGNOSE_INFO_LENGTH;
|
const int64_t buf_len = common::OB_DIAGNOSE_INFO_LENGTH;
|
||||||
|
|||||||
@ -130,6 +130,9 @@ public:
|
|||||||
ObDiagnoseTabletCompProgress &input_progress);
|
ObDiagnoseTabletCompProgress &input_progress);
|
||||||
static int check_system_compaction_config(char *tmp_str, const int64_t buf_len);
|
static int check_system_compaction_config(char *tmp_str, const int64_t buf_len);
|
||||||
private:
|
private:
|
||||||
|
int diagnose_ls_merge(
|
||||||
|
const ObMergeType merge_type,
|
||||||
|
const ObLSID &ls_id);
|
||||||
int diagnose_tablet_mini_merge(const ObLSID &ls_id, ObTablet &tablet);
|
int diagnose_tablet_mini_merge(const ObLSID &ls_id, ObTablet &tablet);
|
||||||
int diagnose_tablet_minor_merge(const ObLSID &ls_id, ObTablet &tablet);
|
int diagnose_tablet_minor_merge(const ObLSID &ls_id, ObTablet &tablet);
|
||||||
int diagnose_tablet_medium_merge(
|
int diagnose_tablet_medium_merge(
|
||||||
@ -209,20 +212,21 @@ private:
|
|||||||
|
|
||||||
#define DEL_SUSPECT_INFO(type, ls_id, tablet_id) \
|
#define DEL_SUSPECT_INFO(type, ls_id, tablet_id) \
|
||||||
{ \
|
{ \
|
||||||
|
int tmp_ret = OB_SUCCESS; \
|
||||||
compaction::ObMergeDagHash dag_hash; \
|
compaction::ObMergeDagHash dag_hash; \
|
||||||
dag_hash.merge_type_ = type; \
|
dag_hash.merge_type_ = type; \
|
||||||
dag_hash.ls_id_ = ls_id; \
|
dag_hash.ls_id_ = ls_id; \
|
||||||
dag_hash.tablet_id_ = tablet_id; \
|
dag_hash.tablet_id_ = tablet_id; \
|
||||||
int64_t tenant_id = MTL_ID(); \
|
int64_t tenant_id = MTL_ID(); \
|
||||||
int64_t hash_value = ObScheduleSuspectInfo::gen_hash(tenant_id, dag_hash.inner_hash()); \
|
int64_t hash_value = ObScheduleSuspectInfo::gen_hash(tenant_id, dag_hash.inner_hash()); \
|
||||||
if (OB_FAIL(ObScheduleSuspectInfoMgr::get_instance().del_suspect_info(hash_value))) { \
|
if (OB_TMP_FAIL(ObScheduleSuspectInfoMgr::get_instance().del_suspect_info(hash_value))) { \
|
||||||
if (OB_HASH_NOT_EXIST != ret) { \
|
if (OB_HASH_NOT_EXIST != tmp_ret) { \
|
||||||
STORAGE_LOG(WARN, "failed to add suspect info", K(ret), K(dag_hash), K(tenant_id)); \
|
STORAGE_LOG(WARN, "failed to add suspect info", K(tmp_ret), K(dag_hash), K(tenant_id)); \
|
||||||
} else { \
|
} else { \
|
||||||
ret = OB_SUCCESS; \
|
tmp_ret = OB_SUCCESS; \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
STORAGE_LOG(DEBUG, "success to add suspect info", K(ret), K(dag_hash), K(tenant_id)); \
|
STORAGE_LOG(DEBUG, "success to add suspect info", K(tmp_ret), K(dag_hash), K(tenant_id)); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -921,7 +921,6 @@ int ObTenantTabletScheduler::schedule_ls_minor_merge(
|
|||||||
int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||||
int64_t &merge_version,
|
int64_t &merge_version,
|
||||||
ObLSHandle &ls_handle,
|
ObLSHandle &ls_handle,
|
||||||
bool &ls_merge_finish,
|
|
||||||
bool &all_ls_weak_read_ts_ready,
|
bool &all_ls_weak_read_ts_ready,
|
||||||
int64_t &schedule_tablet_cnt)
|
int64_t &schedule_tablet_cnt)
|
||||||
{
|
{
|
||||||
@ -968,6 +967,11 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
|||||||
is_leader = true;
|
is_leader = true;
|
||||||
if (OB_FAIL(ls_locality_cache_.get_ls_locality(ls_id, ls_locality))) {
|
if (OB_FAIL(ls_locality_cache_.get_ls_locality(ls_id, ls_locality))) {
|
||||||
LOG_WARN("failed to get ls locality", K(ret), K(ls_id));
|
LOG_WARN("failed to get ls locality", K(ret), K(ls_id));
|
||||||
|
} else if (0 == ls_locality.svr_addr_list_.count()) {
|
||||||
|
ADD_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX),
|
||||||
|
"maybe bad case: locality change and leader change", K(ls_locality));
|
||||||
|
} else {
|
||||||
|
DEL_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1048,7 +1052,7 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
|||||||
LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id));
|
LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ls_merge_finish &= tablet_merge_finish;
|
medium_ls_tablet_iter_.update_merge_finish(tablet_merge_finish);
|
||||||
}
|
}
|
||||||
} // end of while
|
} // end of while
|
||||||
} // else
|
} // else
|
||||||
@ -1076,7 +1080,6 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
} else if (!medium_ls_tablet_iter_.is_valid() && OB_FAIL(medium_ls_tablet_iter_.build_iter())) {
|
} else if (!medium_ls_tablet_iter_.is_valid() && OB_FAIL(medium_ls_tablet_iter_.build_iter())) {
|
||||||
LOG_WARN("failed to init iterator", K(ret));
|
LOG_WARN("failed to init iterator", K(ret));
|
||||||
} else {
|
} else {
|
||||||
bool tenant_merge_finish = true;
|
|
||||||
bool all_ls_weak_read_ts_ready = true;
|
bool all_ls_weak_read_ts_ready = true;
|
||||||
bool check_report_scn_flag = false;
|
bool check_report_scn_flag = false;
|
||||||
int64_t merge_version = get_frozen_version();
|
int64_t merge_version = get_frozen_version();
|
||||||
@ -1109,7 +1112,6 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) {
|
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) {
|
||||||
bool ls_merge_finish = true;
|
|
||||||
if (OB_FAIL(medium_ls_tablet_iter_.get_next_ls(ls_handle))) {
|
if (OB_FAIL(medium_ls_tablet_iter_.get_next_ls(ls_handle))) {
|
||||||
if (OB_ITER_END == ret) {
|
if (OB_ITER_END == ret) {
|
||||||
ret = OB_SUCCESS;
|
ret = OB_SUCCESS;
|
||||||
@ -1121,18 +1123,16 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
ret = OB_ERR_UNEXPECTED;
|
ret = OB_ERR_UNEXPECTED;
|
||||||
LOG_WARN("ls is null", K(ret), K(ls));
|
LOG_WARN("ls is null", K(ret), K(ls));
|
||||||
} else if (OB_TMP_FAIL(schedule_ls_medium_merge(
|
} else if (OB_TMP_FAIL(schedule_ls_medium_merge(
|
||||||
merge_version, ls_handle, ls_merge_finish,
|
merge_version, ls_handle,
|
||||||
all_ls_weak_read_ts_ready, schedule_tablet_cnt))) {
|
all_ls_weak_read_ts_ready, schedule_tablet_cnt))) {
|
||||||
medium_ls_tablet_iter_.skip_cur_ls(); // for any errno, skip cur ls
|
medium_ls_tablet_iter_.skip_cur_ls(); // for any errno, skip cur ls
|
||||||
tenant_merge_finish = false;
|
medium_ls_tablet_iter_.update_merge_finish(false);
|
||||||
if (OB_SIZE_OVERFLOW == tmp_ret) {
|
if (OB_SIZE_OVERFLOW == tmp_ret) {
|
||||||
break;
|
break;
|
||||||
} else if (!schedule_ignore_error(tmp_ret)) {
|
} else if (!schedule_ignore_error(tmp_ret)) {
|
||||||
LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls));
|
LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tenant_merge_finish &= ls_merge_finish;
|
|
||||||
|
|
||||||
// loop tablet_meta table to update smaller report_scn because of migration
|
// loop tablet_meta table to update smaller report_scn because of migration
|
||||||
if (check_report_scn_flag) {
|
if (check_report_scn_flag) {
|
||||||
(void) update_report_scn_as_ls_leader(*ls);
|
(void) update_report_scn_as_ls_leader(*ls);
|
||||||
@ -1140,9 +1140,9 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
}
|
}
|
||||||
} // end while
|
} // end while
|
||||||
|
|
||||||
if (!tenant_merge_finish) { // wait major compaction
|
if (!medium_ls_tablet_iter_.tenant_merge_finish()) { // wait major compaction
|
||||||
if (all_ls_weak_read_ts_ready) { // check schedule Timer Task
|
if (all_ls_weak_read_ts_ready) { // check schedule Timer Task
|
||||||
if (schedule_stats_.add_weak_read_ts_event_flag_) {
|
if (schedule_stats_.add_weak_read_ts_event_flag_ && medium_ls_tablet_iter_.is_scan_finish()) { // all ls scan finish
|
||||||
schedule_stats_.add_weak_read_ts_event_flag_ = false;
|
schedule_stats_.add_weak_read_ts_event_flag_ = false;
|
||||||
ADD_COMPACTION_EVENT(
|
ADD_COMPACTION_EVENT(
|
||||||
MTL_ID(),
|
MTL_ID(),
|
||||||
@ -1159,7 +1159,7 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (REACH_TENANT_TIME_INTERVAL(ADD_LOOP_EVENT_INTERVAL)) {
|
if (medium_ls_tablet_iter_.is_scan_finish() && REACH_TENANT_TIME_INTERVAL(ADD_LOOP_EVENT_INTERVAL)) {
|
||||||
ADD_COMPACTION_EVENT(
|
ADD_COMPACTION_EVENT(
|
||||||
MTL_ID(),
|
MTL_ID(),
|
||||||
MAJOR_MERGE,
|
MAJOR_MERGE,
|
||||||
@ -1171,7 +1171,7 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OB_SUCC(ret) && tenant_merge_finish && merge_version > merged_version_) {
|
if (OB_SUCC(ret) && medium_ls_tablet_iter_.tenant_merge_finish() && merge_version > merged_version_) {
|
||||||
merged_version_ = merge_version;
|
merged_version_ = merge_version;
|
||||||
LOG_INFO("all tablet major merge finish", K(merged_version_), K(merge_version));
|
LOG_INFO("all tablet major merge finish", K(merged_version_), K(merge_version));
|
||||||
DEL_SUSPECT_INFO(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX));
|
DEL_SUSPECT_INFO(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX));
|
||||||
@ -1194,10 +1194,13 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium()
|
|||||||
reload_tenant_config(); // tenant merge finish, use tenant default config to loop
|
reload_tenant_config(); // tenant merge finish, use tenant default config to loop
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("finish schedule all tablet merge", K(merge_version), K(schedule_stats_), K(tenant_merge_finish),
|
LOG_INFO("finish schedule all tablet merge", K(merge_version), K(schedule_stats_),
|
||||||
|
"tenant_merge_finish", medium_ls_tablet_iter_.tenant_merge_finish(),
|
||||||
K(merged_version_), K(schedule_tablet_cnt));
|
K(merged_version_), K(schedule_tablet_cnt));
|
||||||
|
if (medium_ls_tablet_iter_.is_scan_finish()) {
|
||||||
schedule_stats_.clear_tablet_cnt();
|
schedule_stats_.clear_tablet_cnt();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1275,6 +1278,8 @@ int ObCompactionScheduleIterator::build_iter()
|
|||||||
ls_idx_ = -1;
|
ls_idx_ = -1;
|
||||||
tablet_idx_ = 0;
|
tablet_idx_ = 0;
|
||||||
tablet_ids_.reuse();
|
tablet_ids_.reuse();
|
||||||
|
scan_finish_ = false;
|
||||||
|
merge_finish_ = true;
|
||||||
LOG_TRACE("build iter", K(ret), K(ls_ids_));
|
LOG_TRACE("build iter", K(ret), K(ls_ids_));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -1290,6 +1295,7 @@ int ObCompactionScheduleIterator::get_next_ls(ObLSHandle &ls_handle)
|
|||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
if (ls_idx_ >= ls_ids_.count()) {
|
if (ls_idx_ >= ls_ids_.count()) {
|
||||||
|
scan_finish_ = true;
|
||||||
ret = OB_ITER_END;
|
ret = OB_ITER_END;
|
||||||
} else if (OB_FAIL((MTL(storage::ObLSService *)->get_ls(ls_ids_[ls_idx_], ls_handle, mod_)))) {
|
} else if (OB_FAIL((MTL(storage::ObLSService *)->get_ls(ls_ids_[ls_idx_], ls_handle, mod_)))) {
|
||||||
if (OB_LS_NOT_EXIST == ret) {
|
if (OB_LS_NOT_EXIST == ret) {
|
||||||
@ -1311,6 +1317,8 @@ void ObCompactionScheduleIterator::reset()
|
|||||||
tablet_idx_ = 0;
|
tablet_idx_ = 0;
|
||||||
ls_ids_.reuse();
|
ls_ids_.reuse();
|
||||||
tablet_ids_.reuse();
|
tablet_ids_.reuse();
|
||||||
|
scan_finish_ = false;
|
||||||
|
merge_finish_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ObCompactionScheduleIterator::is_valid() const
|
bool ObCompactionScheduleIterator::is_valid() const
|
||||||
|
|||||||
@ -66,6 +66,8 @@ public:
|
|||||||
const int64_t timeout_us = ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US)
|
const int64_t timeout_us = ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US)
|
||||||
: mod_(mod),
|
: mod_(mod),
|
||||||
is_major_(is_major),
|
is_major_(is_major),
|
||||||
|
scan_finish_(false),
|
||||||
|
merge_finish_(false),
|
||||||
timeout_us_(timeout_us),
|
timeout_us_(timeout_us),
|
||||||
ls_idx_(0),
|
ls_idx_(0),
|
||||||
tablet_idx_(0),
|
tablet_idx_(0),
|
||||||
@ -76,6 +78,11 @@ public:
|
|||||||
int build_iter();
|
int build_iter();
|
||||||
int get_next_ls(ObLSHandle &ls_handle);
|
int get_next_ls(ObLSHandle &ls_handle);
|
||||||
int get_next_tablet(ObLSHandle &ls_handle, ObTabletHandle &tablet_handle);
|
int get_next_tablet(ObLSHandle &ls_handle, ObTabletHandle &tablet_handle);
|
||||||
|
bool is_scan_finish() const { return scan_finish_; }
|
||||||
|
bool tenant_merge_finish() const { return merge_finish_ & scan_finish_; }
|
||||||
|
void update_merge_finish(bool merge_finish) {
|
||||||
|
merge_finish_ &= merge_finish;
|
||||||
|
}
|
||||||
void reset();
|
void reset();
|
||||||
bool is_valid() const;
|
bool is_valid() const;
|
||||||
void skip_cur_ls()
|
void skip_cur_ls()
|
||||||
@ -89,6 +96,8 @@ private:
|
|||||||
static const int64_t TABLET_ID_ARRAY_CNT = 2000;
|
static const int64_t TABLET_ID_ARRAY_CNT = 2000;
|
||||||
ObLSGetMod mod_;
|
ObLSGetMod mod_;
|
||||||
bool is_major_;
|
bool is_major_;
|
||||||
|
bool scan_finish_;
|
||||||
|
bool merge_finish_;
|
||||||
int64_t timeout_us_;
|
int64_t timeout_us_;
|
||||||
int64_t ls_idx_;
|
int64_t ls_idx_;
|
||||||
uint64_t tablet_idx_;
|
uint64_t tablet_idx_;
|
||||||
@ -219,7 +228,6 @@ private:
|
|||||||
int schedule_ls_medium_merge(
|
int schedule_ls_medium_merge(
|
||||||
int64_t &merge_version,
|
int64_t &merge_version,
|
||||||
ObLSHandle &ls_handle,
|
ObLSHandle &ls_handle,
|
||||||
bool &ls_merge_finish,
|
|
||||||
bool &all_ls_weak_read_ts_ready,
|
bool &all_ls_weak_read_ts_ready,
|
||||||
int64_t &schedule_tablet_cnt);
|
int64_t &schedule_tablet_cnt);
|
||||||
int schedule_ls_minor_merge(
|
int schedule_ls_minor_merge(
|
||||||
|
|||||||
Reference in New Issue
Block a user