diff --git a/src/share/scheduler/ob_dag_scheduler.cpp b/src/share/scheduler/ob_dag_scheduler.cpp index a7df7a1558..d0226cf66e 100644 --- a/src/share/scheduler/ob_dag_scheduler.cpp +++ b/src/share/scheduler/ob_dag_scheduler.cpp @@ -2166,6 +2166,7 @@ int ObTenantDagScheduler::diagnose_minor_exe_dag( ObThreadCondGuard guard(scheduler_sync_); ObIDag *head = dag_list_[READY_DAG_LIST].get_head(ObDagPrio::DAG_PRIO_COMPACTION_MID); ObIDag *cur = head->get_next(); + bool find = false; while (head != cur && OB_SUCC(ret)) { if (cur->get_type() == ObDagType::DAG_TYPE_MERGE_EXECUTE) { compaction::ObTabletMergeExecuteDag *exe_dag = static_cast(cur); @@ -2173,12 +2174,16 @@ int ObTenantDagScheduler::diagnose_minor_exe_dag( if (OB_FAIL(exe_dag->diagnose_compaction_info(progress))) { LOG_WARN("failed to diagnose compaction dag", K(ret), K(exe_dag)); } else { + find = true; break; } } } cur = cur->get_next(); } // end of while + if (OB_SUCC(ret) && !find) { + ret = OB_HASH_NOT_EXIST; + } } return ret; } diff --git a/src/storage/compaction/ob_compaction_diagnose.cpp b/src/storage/compaction/ob_compaction_diagnose.cpp index 46bcd6a883..2f7539ad54 100644 --- a/src/storage/compaction/ob_compaction_diagnose.cpp +++ b/src/storage/compaction/ob_compaction_diagnose.cpp @@ -402,6 +402,22 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet() } } (void)diagnose_medium_scn_table(compaction_scn); + // check tenant suspect info + if (diagnose_major_flag) { + ObScheduleSuspectInfo ret_info; + if (OB_SUCC(get_suspect_info(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX), ret_info)) + && can_add_diagnose_info()) { + SET_DIAGNOSE_INFO( + info_array_[idx_++], + MEDIUM_MERGE, + ret_info.tenant_id_, + share::ObLSID(INT64_MAX), + ObTabletID(INT64_MAX), + ObCompactionDiagnoseInfo::DIA_STATUS_FAILED, + ret_info.add_time_, + "schedule_suspect_info", ret_info.suspect_info_); + } + } while (OB_SUCC(ret)) { // loop all log_stream bool need_merge = false; @@ -546,7 +562,14 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_mini_merge( int ObCompactionDiagnoseMgr::diagnose_tablet_minor_merge(const ObLSID &ls_id, ObTablet &tablet) { int ret = OB_SUCCESS; - if (tablet.get_table_store().get_minor_sstables().count() >= DIAGNOSE_TABLE_CNT_IN_STORAGE) { + int64_t minor_compact_trigger = ObPartitionMergePolicy::DEFAULT_MINOR_COMPACT_TRIGGER; + { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (tenant_config.is_valid()) { + minor_compact_trigger = tenant_config->minor_compact_trigger; + } + } + if (tablet.get_table_store().get_minor_sstables().count() >= minor_compact_trigger) { ObTabletMergeExecuteDag dag; if (OB_FAIL(diagnose_tablet_merge( dag, @@ -690,6 +713,8 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_merge( LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id), K(progress)); } } + } else if (OB_FAIL(diagnose_no_dag(dag, merge_type, ls_id, tablet_id, compaction_scn))) { + LOG_WARN("failed to dagnose no dag", K(ret), K(ls_id), K(tablet_id)); } return ret; } diff --git a/src/storage/compaction/ob_partition_merge_progress.cpp b/src/storage/compaction/ob_partition_merge_progress.cpp index 0aa4511bf2..726798bf05 100644 --- a/src/storage/compaction/ob_partition_merge_progress.cpp +++ b/src/storage/compaction/ob_partition_merge_progress.cpp @@ -298,16 +298,27 @@ int ObPartitionMergeProgress::update_merge_info(ObSSTableMergeInfo &merge_info) void ObPartitionMergeProgress::update_estimated_finish_time_() { + int tmp_ret = OB_SUCCESS; int64_t current_time = ObTimeUtility::fast_current_time(); + int64_t start_time = current_time; if (0 == pre_scanned_row_cnt_) { // first time to init merge_progress int64_t spend_time = estimate_occupy_size_ / common::OB_DEFAULT_MACRO_BLOCK_SIZE * ObCompactionProgress::MERGE_SPEED + ObCompactionProgress::EXTRA_TIME; - estimated_finish_time_ = spend_time + current_time + UPDATE_INTERVAL; + estimated_finish_time_ = spend_time + start_time + UPDATE_INTERVAL; } else { + start_time = merge_dag_->get_start_time(); int64_t delta_row_cnt = estimate_row_cnt_ - pre_scanned_row_cnt_; - int64_t rest_time = MAX(1, delta_row_cnt) * (current_time - merge_dag_->get_start_time()) / pre_scanned_row_cnt_; + int64_t rest_time = MAX(1, delta_row_cnt) * (current_time - start_time) / pre_scanned_row_cnt_; estimated_finish_time_ = MAX(estimated_finish_time_, current_time + rest_time + UPDATE_INTERVAL); } + if (estimated_finish_time_ - start_time >= MAX_ESTIMATE_SPEND_TIME) { + if (REACH_TENANT_TIME_INTERVAL(PRINT_ESTIMATE_WARN_INTERVAL)) { + tmp_ret = OB_ERR_UNEXPECTED; + LOG_WARN_RET(tmp_ret, "estimated finish time is too large", K(tmp_ret), K_(estimate_occupy_size), + K(start_time), K(current_time), K_(pre_scanned_row_cnt), K_(estimate_row_cnt), K_(estimated_finish_time)); + } + estimated_finish_time_ = start_time + MAX_ESTIMATE_SPEND_TIME; + } } // called by ObTabletMergeFinishTask::process() diff --git a/src/storage/compaction/ob_partition_merge_progress.h b/src/storage/compaction/ob_partition_merge_progress.h index 0624f4f77f..4baec1ecb2 100644 --- a/src/storage/compaction/ob_partition_merge_progress.h +++ b/src/storage/compaction/ob_partition_merge_progress.h @@ -52,6 +52,8 @@ public: static const int32_t NORMAL_UPDATE_PARAM = 300; static const int32_t DEFAULT_ROW_CNT_PER_MACRO_BLOCK = 1000; static const int32_t DEFAULT_INCREMENT_ROW_FACTOR = 10; + static const int64_t MAX_ESTIMATE_SPEND_TIME = 24 * 60 * 60 * 1000 * 1000l; // 24 hours + static const int64_t PRINT_ESTIMATE_WARN_INTERVAL = 5 * 60 * 1000 * 1000; // 1 min protected: int estimate(ObTabletMergeCtx *ctx); void update_estimated_finish_time_(); diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index bdee6e6dde..400a12c65c 100755 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -1042,6 +1042,9 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium() // do nothing, should not loop tablets if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { LOG_INFO("compat_version is smaller than DATA_VERSION_4_1_0_0, cannot schedule medium", K(compat_version)); + ADD_SUSPECT_INFO(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX), + "invalid data version to schedule all tablets medium", + K(compat_version), "DATA_VERSION_4_1_0_0", DATA_VERSION_4_1_0_0); } } else if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::STORAGE_MOD))) { LOG_WARN("failed to get ls iterator", K(ret)); @@ -1132,7 +1135,7 @@ int ObTenantTabletScheduler::schedule_all_tablets_medium() if (OB_SUCC(ret) && tenant_merge_finish && merge_version > merged_version_) { merged_version_ = merge_version; LOG_INFO("all tablet major merge finish", K(merged_version_), K(merge_version)); - + DEL_SUSPECT_INFO(MEDIUM_MERGE, share::ObLSID(INT64_MAX), ObTabletID(INT64_MAX)); if (OB_TMP_FAIL(MTL(ObTenantCompactionProgressMgr *)->update_progress( merge_version, share::ObIDag::DAG_STATUS_FINISH))) {