From 12e2837207edcf644a0aac32a7a72dd73c1cabee Mon Sep 17 00:00:00 2001 From: Fengjingkun Date: Fri, 16 Jun 2023 09:42:05 +0000 Subject: [PATCH] disable adaptvie compaction when detecting high CPU load --- .../compaction/ob_partition_merge_policy.cpp | 72 ++++---- .../compaction/ob_partition_merge_policy.h | 25 +-- .../compaction/ob_tablet_merge_task.cpp | 2 + .../compaction/ob_tenant_tablet_scheduler.cpp | 15 +- src/storage/ob_tenant_tablet_stat_mgr.cpp | 165 +++++++++++++----- src/storage/ob_tenant_tablet_stat_mgr.h | 82 ++++++--- .../storage/test_tenant_tablet_stat_mgr.cpp | 16 +- 7 files changed, 252 insertions(+), 125 deletions(-) diff --git a/src/storage/compaction/ob_partition_merge_policy.cpp b/src/storage/compaction/ob_partition_merge_policy.cpp index 1033dc2b2e..1c054927e3 100644 --- a/src/storage/compaction/ob_partition_merge_policy.cpp +++ b/src/storage/compaction/ob_partition_merge_policy.cpp @@ -1428,32 +1428,37 @@ int ObAdaptiveMergePolicy::get_adaptive_merge_reason( int tmp_ret = OB_SUCCESS; const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - ObTabletStat tablet_stat; - reason = AdaptiveMergeReason::NONE; - if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(ls_id, tablet_id, tablet_stat))) { + reason = AdaptiveMergeReason::NONE; + ObTabletStatAnalyzer tablet_analyzer; + + if (tablet_id.is_special_merge_tablet()) { + // do nothing + } else if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_tablet_analyzer(ls_id, tablet_id, tablet_analyzer))) { if (OB_HASH_NOT_EXIST != ret) { - LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id)); + LOG_WARN("failed to get tablet analyzer stat", K(ret), K(ls_id), K(tablet_id)); } else if (OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) { LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id)); + } else { + ret = OB_SUCCESS; } } else { - if (OB_TMP_FAIL(check_tombstone_situation(tablet_stat, tablet, reason))) { - LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id)); + if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, tablet, reason))) { + LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } - if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_stat, tablet, reason))) { - LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id)); + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_analyzer, tablet, reason))) { + LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) { - LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id)); + LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } - if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_stat, tablet, reason))) { - LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id)); + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_analyzer, tablet, reason))) { + LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } + } - if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) { - LOG_INFO("Check tablet adaptive merge reason", K(reason), K(tablet_stat)); // TODO tmp log, remove later - } + if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) { + LOG_INFO("Check tablet adaptive merge reason", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_analyzer)); } return ret; } @@ -1489,7 +1494,7 @@ int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage( } int ObAdaptiveMergePolicy::check_load_data_situation( - const ObTabletStat &tablet_stat, + const storage::ObTabletStatAnalyzer &analyzer, const ObTablet &tablet, AdaptiveMergeReason &reason) { @@ -1497,19 +1502,20 @@ int ObAdaptiveMergePolicy::check_load_data_situation( const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (!tablet.is_valid() || !tablet_stat.is_valid() - || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + + if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() + || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); - } else if (tablet_stat.is_hot_tablet() && tablet_stat.is_insert_mostly()) { + LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer)); + } else if (analyzer.is_hot_tablet() && analyzer.is_insert_mostly()) { reason = AdaptiveMergeReason::LOAD_DATA_SCENE; } - LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); return ret; } int ObAdaptiveMergePolicy::check_tombstone_situation( - const ObTabletStat &tablet_stat, + const storage::ObTabletStatAnalyzer &analyzer, const ObTablet &tablet, AdaptiveMergeReason &reason) { @@ -1518,19 +1524,19 @@ int ObAdaptiveMergePolicy::check_tombstone_situation( const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (!tablet.is_valid() || !tablet_stat.is_valid() - || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() + || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); - } else if (tablet_stat.is_hot_tablet() && (tablet_stat.is_update_mostly() || tablet_stat.is_delete_mostly())) { + LOG_WARN("get invalid arguments", K(ret), K(analyzer), K(tablet)); + } else if (analyzer.tablet_stat_.merge_cnt_ > 1 && analyzer.is_update_or_delete_mostly()) { reason = AdaptiveMergeReason::TOMBSTONE_SCENE; } - LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); return ret; } int ObAdaptiveMergePolicy::check_ineffecient_read( - const ObTabletStat &tablet_stat, + const storage::ObTabletStatAnalyzer &analyzer, const ObTablet &tablet, AdaptiveMergeReason &reason) { @@ -1539,16 +1545,14 @@ int ObAdaptiveMergePolicy::check_ineffecient_read( const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (!tablet.is_valid() || !tablet_stat.is_valid() || - ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() + || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); - } else if (!tablet_stat.is_hot_tablet()) { - } else if (tablet_stat.is_inefficient_scan() || tablet_stat.is_inefficient_insert() - || tablet_stat.is_inefficient_pushdown()) { + LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer)); + } else if (analyzer.is_hot_tablet() && analyzer.has_slow_query()) { reason = AdaptiveMergeReason::INEFFICIENT_QUERY; } - LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); return ret; } diff --git a/src/storage/compaction/ob_partition_merge_policy.h b/src/storage/compaction/ob_partition_merge_policy.h index 4bbb04662d..443a6756a5 100644 --- a/src/storage/compaction/ob_partition_merge_policy.h +++ b/src/storage/compaction/ob_partition_merge_policy.h @@ -28,7 +28,7 @@ class ObTabletTableStore; class ObGetMergeTablesResult; class ObTablesHandleArray; class ObStorageSchema; -struct ObTabletStat; +struct ObTabletStatAnalyzer; struct ObTableHandleV2; class ObLS; class ObTableStoreIterator; @@ -242,15 +242,18 @@ private: storage::ObGetMergeTablesResult &result, const bool update_snapshot_flag); private: - static int check_load_data_situation(const storage::ObTabletStat &tablet_stat, - const storage::ObTablet &tablet, - AdaptiveMergeReason &merge_reason); - static int check_tombstone_situation(const storage::ObTabletStat &tablet_stat, - const storage::ObTablet &tablet, - AdaptiveMergeReason &merge_reason); - static int check_ineffecient_read(const storage::ObTabletStat &tablet_stat, - const storage::ObTablet &tablet, - AdaptiveMergeReason &merge_reason); + static int check_load_data_situation( + const storage::ObTabletStatAnalyzer &analyzer, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + static int check_tombstone_situation( + const storage::ObTabletStatAnalyzer &analyzer, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + static int check_ineffecient_read( + const storage::ObTabletStatAnalyzer &analyzer, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); static int check_inc_sstable_row_cnt_percentage( const ObTablet &tablet, AdaptiveMergeReason &merge_reason); @@ -263,7 +266,7 @@ private: static constexpr int64_t LOAD_DATA_SCENE_THRESHOLD = 70; static constexpr int64_t TOMBSTONE_SCENE_THRESHOLD = 50; static constexpr float INC_ROW_COUNT_PERCENTAGE_THRESHOLD = 0.5; - static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 1000L; // 1k + static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 10000L; // 10k }; diff --git a/src/storage/compaction/ob_tablet_merge_task.cpp b/src/storage/compaction/ob_tablet_merge_task.cpp index b35ff2eb5b..eb933576eb 100644 --- a/src/storage/compaction/ob_tablet_merge_task.cpp +++ b/src/storage/compaction/ob_tablet_merge_task.cpp @@ -38,6 +38,7 @@ #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "share/ob_get_compat_mode.h" #include "share/ob_tablet_meta_table_compaction_operator.h" +#include "share/resource_manager/ob_cgroup_ctrl.h" namespace oceanbase { @@ -1297,6 +1298,7 @@ int ObTabletMergeFinishTask::try_schedule_compaction_after_mini( int tmp_ret = OB_SUCCESS; const ObTabletID &tablet_id = ctx.param_.tablet_id_; ObLSID ls_id = ctx.param_.ls_id_; + // report tablet stat if (0 == ctx.get_merge_info().get_sstable_merge_info().macro_block_count_) { // empty mini compaction, no need to reprot stat diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index 8771fce3aa..7e66bba6e3 100755 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -985,6 +985,17 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge( DEL_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX)); } + bool enable_adaptive_compaction = enable_adaptive_compaction_; + ObTenantSysStat cur_sys_stat; + if (!enable_adaptive_compaction_) { + // do nothing + } else if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_sys_stat(cur_sys_stat))) { + LOG_WARN("failed to get tenant sys stat", K(tmp_ret), K(cur_sys_stat)); + } else if (cur_sys_stat.is_full_cpu_usage()) { + enable_adaptive_compaction = false; + FLOG_INFO("disable adaptive compaction due to the high load CPU", K(ret), K(cur_sys_stat)); + } + while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls bool tablet_merge_finish = false; if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) { @@ -1034,14 +1045,14 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge( } else if (ObTimeUtility::fast_current_time() * 1000 < tablet->get_medium_compaction_info_list().get_wait_check_medium_scn() + WAIT_MEDIUM_CHECK_THRESHOLD) { // need wait 10 mins before schedule meta major - } else if (enable_adaptive_compaction_ && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) { + } else if (enable_adaptive_compaction && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) { if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id)); } } } if (could_schedule_next_medium && could_major_merge - && (!tablet_merge_finish || enable_adaptive_compaction_ || check_medium_finish) + && (!tablet_merge_finish || enable_adaptive_compaction || check_medium_finish) && OB_TMP_FAIL(func.schedule_next_medium_for_leader( tablet_merge_finish ? 0 : merge_version, schedule_stats_))) { // schedule another round LOG_WARN("failed to schedule next medium", K(tmp_ret), K(ls_id), K(tablet_id)); diff --git a/src/storage/ob_tenant_tablet_stat_mgr.cpp b/src/storage/ob_tenant_tablet_stat_mgr.cpp index 8e81fabdc8..36ebee6664 100644 --- a/src/storage/ob_tenant_tablet_stat_mgr.cpp +++ b/src/storage/ob_tenant_tablet_stat_mgr.cpp @@ -9,6 +9,8 @@ #include "share/ob_force_print_log.h" #include "share/ob_thread_mgr.h" #include "storage/ob_tenant_tablet_stat_mgr.h" +#include "observer/ob_server_struct.h" +#include "observer/ob_server.h" using namespace oceanbase; using namespace oceanbase::common; @@ -105,17 +107,28 @@ bool ObTabletStat::is_valid() const bool ObTabletStat::check_need_report() const { bool bret = false; + ObTabletID tablet_id(tablet_id_); - if (0 != query_cnt_) { // report by query - if (QUERY_REPORT_MIN_ROW_CNT <= scan_physical_row_cnt_ || - QUERY_REPORT_MIN_MICRO_BLOCK_CNT <= scan_micro_block_cnt_ || - QUERY_REPORT_MIN_SCAN_TABLE_CNT <= exist_row_total_table_cnt_) { + if (tablet_id.is_ls_inner_tablet()) { + // do nothing + } else if (0 < merge_cnt_) { // report by compaction + bret = get_total_merge_row_count() >= MERGE_REPORT_MIN_ROW_CNT; + } else if (0 < query_cnt_) { // only report the slow query + const int64_t boost_factor = tablet_id.is_inner_tablet() ? 2 : 1; + if (scan_physical_row_cnt_ > 0 && + scan_physical_row_cnt_ >= scan_logical_row_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) { + bret = true; + } + + if (!bret && scan_micro_block_cnt_ > 0 && + scan_micro_block_cnt_ >= pushdown_micro_block_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) { + bret = true; + } + + if (!bret && exist_row_total_table_cnt_ > 0 && + exist_row_total_table_cnt_ >= exist_row_read_table_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) { bret = true; } - } else if (0 != merge_cnt_) { // report by compaction - bret = MERGE_REPORT_MIN_ROW_CNT <= insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; - } else { // invalid tablet stat - bret = false; } return bret; } @@ -166,74 +179,96 @@ ObTabletStat& ObTabletStat::archive(int64_t factor) return *this; } -bool ObTabletStat::is_hot_tablet() const + +/************************************* ObTabletStatAnalyzer *************************************/ +bool ObTabletStatAnalyzer::is_hot_tablet() const { - return query_cnt_ + merge_cnt_ >= ACCESS_FREQUENCY; + return tablet_stat_.query_cnt_ + tablet_stat_.merge_cnt_ >= ACCESS_FREQUENCY * boost_factor_; } -bool ObTabletStat::is_insert_mostly() const +bool ObTabletStatAnalyzer::is_insert_mostly() const { bool bret = false; - uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; - if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) { + ObTabletID tablet_id(tablet_stat_.tablet_id_); + uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count(); + + if (tablet_id.is_inner_tablet() || tablet_id.is_ls_inner_tablet()) { + // do nothing + } else if (0 == tablet_stat_.insert_row_cnt_) { + // no insert occurs + } else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) { // do nothing } else { - bret = insert_row_cnt_ * BASE_FACTOR / total_row_cnt >= INSERT_PIVOT_FACTOR; + bret = total_row_cnt * LOAD_THRESHOLD <= tablet_stat_.insert_row_cnt_ * BASE_FACTOR; } return bret; } -bool ObTabletStat::is_update_mostly() const +bool ObTabletStatAnalyzer::is_update_or_delete_mostly() const { bool bret = false; - uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; - if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) { + uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count(); + + if (0 == tablet_stat_.delete_row_cnt_ + tablet_stat_.update_row_cnt_) { + // no update && delete occurs + } else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) { // do nothing } else { - bret = update_row_cnt_ * BASE_FACTOR / total_row_cnt >= UPDATE_PIVOT_FACTOR; + bret = total_row_cnt * TOMBSTONE_THRESHOLD * boost_factor_ <= (tablet_stat_.update_row_cnt_ + tablet_stat_.delete_row_cnt_) * BASE_FACTOR; } return bret; } -bool ObTabletStat::is_delete_mostly() const +bool ObTabletStatAnalyzer::has_slow_query() const { bool bret = false; - uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; - if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) { - // do nothing - } else { - bret = delete_row_cnt_ * BASE_FACTOR / total_row_cnt >= DELETE_PIVOT_FACTOR; + // all tablet query stats are ineffecient, only check the basic threshold + if (tablet_stat_.scan_physical_row_cnt_ >= QUERY_BASIC_ROW_CNT * boost_factor_ || + tablet_stat_.scan_micro_block_cnt_ >= QUERY_BASIC_MICRO_BLOCK_CNT * boost_factor_ || + tablet_stat_.exist_row_total_table_cnt_ >= QUERY_BASIC_ITER_TABLE_CNT * boost_factor_) { + bret = true; } return bret; } -bool ObTabletStat::is_inefficient_scan() const +/************************************* ObTenantSysStat *************************************/ +ObTenantSysStat::ObTenantSysStat() + : cpu_usage_percentage_(0), + min_cpu_cnt_(0), + max_cpu_cnt_(0), + memory_hold_(0), + memory_limit_(0) +{ +} + +void ObTenantSysStat::reset() +{ + cpu_usage_percentage_ = 0; + min_cpu_cnt_ = 0; + max_cpu_cnt_ = 0; + memory_hold_ = 0; + memory_limit_ = 0; +} + +bool ObTenantSysStat::is_small_tenant() const { bool bret = false; - if (0 == scan_logical_row_cnt_ || scan_logical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) { - } else { - bret = scan_physical_row_cnt_ / scan_logical_row_cnt_ >= SCAN_READ_FACTOR; - } + // 8c16g + const int64_t cpu_threshold = 8; + // When the tenant memory exceeds 10GB, the meta tenant occupies at least 10% of the memory. + const int64_t mem_threshold = (16L << 30) * 9 / 10; + bret = max_cpu_cnt_ < cpu_threshold || memory_limit_ < mem_threshold; return bret; } -bool ObTabletStat::is_inefficient_insert() const +bool ObTenantSysStat::is_full_cpu_usage() const { bool bret = false; - if (0 == exist_row_total_table_cnt_ || exist_row_total_table_cnt_ < BASIC_TABLE_CNT_THRESHOLD) { + if (is_small_tenant()) { + bret = max_cpu_cnt_ * 60 <= cpu_usage_percentage_; } else { - bret = exist_row_read_table_cnt_ * BASE_FACTOR / exist_row_total_table_cnt_ >= EXIST_READ_FACTOR; - } - return bret; -} - -bool ObTabletStat::is_inefficient_pushdown() const -{ - bool bret = false; - if (0 == scan_micro_block_cnt_ || scan_micro_block_cnt_ < BASIC_MICRO_BLOCK_CNT_THRESHOLD) { - } else { - bret = pushdown_micro_block_cnt_ < scan_micro_block_cnt_ / SCAN_READ_FACTOR; + bret = max_cpu_cnt_ * 70 <= cpu_usage_percentage_; } return bret; } @@ -648,6 +683,44 @@ int ObTenantTabletStatMgr::get_history_tablet_stats( return ret; } +int ObTenantTabletStatMgr::get_tablet_analyzer( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTabletStatAnalyzer &analyzer) +{ + int ret = OB_SUCCESS; + ObTenantSysStat sys_stat; + + if (OB_FAIL(get_latest_tablet_stat(ls_id, tablet_id, analyzer.tablet_stat_))) { + LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(get_sys_stat(sys_stat))) { + LOG_WARN("failed to get sys stat", K(ret)); + } else { + analyzer.is_small_tenant_ = sys_stat.is_small_tenant(); + analyzer.boost_factor_ = analyzer.is_small_tenant_ ? 2 : 1; + } + return ret; +} + +int ObTenantTabletStatMgr::get_sys_stat(ObTenantSysStat &sys_stat) +{ + int ret = OB_SUCCESS; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (OB_FAIL(GCTX.omt_->get_tenant_cpu_usage(MTL_ID(), sys_stat.cpu_usage_percentage_))) { + LOG_WARN("failed to get tenant cpu usage", K(ret), K(sys_stat)); + } else if (OB_FAIL(GCTX.omt_->get_tenant_cpu(MTL_ID(), sys_stat.min_cpu_cnt_, sys_stat.max_cpu_cnt_))) { + LOG_WARN("failed to get tenant cpu count", K(ret), K(sys_stat)); + } else { + sys_stat.memory_hold_ = lib::get_tenant_memory_hold(MTL_ID()); + sys_stat.memory_limit_ = lib::get_tenant_memory_limit(MTL_ID()); + sys_stat.cpu_usage_percentage_ *= 100; + } + return ret; +} + int ObTenantTabletStatMgr::update_tablet_stream(const ObTabletStat &report_stat) { int ret = OB_SUCCESS; @@ -723,7 +796,7 @@ int ObTenantTabletStatMgr::fetch_node(ObTabletStreamNode *&node) void ObTenantTabletStatMgr::process_stats() { - int tmp_ret = OB_SUCCESS; + int ret = OB_SUCCESS; const uint64_t start_idx = report_cursor_; const uint64_t pending_cur = ATOMIC_LOAD(&pending_cursor_); uint64_t end_idx = (pending_cur > start_idx + DEFAULT_MAX_PENDING_CNT) @@ -734,10 +807,10 @@ void ObTenantTabletStatMgr::process_stats() } else { for (uint64_t i = start_idx; i < end_idx; ++i) { const ObTabletStat &cur_stat = report_queue_[i % DEFAULT_MAX_PENDING_CNT]; - if (!cur_stat.is_valid()) { + if (OB_UNLIKELY(!cur_stat.is_valid())) { // allow dirty read - } else if (OB_TMP_FAIL(update_tablet_stream(cur_stat))) { - LOG_WARN_RET(tmp_ret, "failed to update tablet stat", K(tmp_ret), K(cur_stat)); + } else if (OB_FAIL(update_tablet_stream(cur_stat))) { + LOG_WARN_RET(ret, "failed to update tablet stat", K(ret), K(cur_stat)); } } report_cursor_ = pending_cur; // only TabletStatUpdater update this value. diff --git a/src/storage/ob_tenant_tablet_stat_mgr.h b/src/storage/ob_tenant_tablet_stat_mgr.h index 601634c606..6b1dea6f39 100644 --- a/src/storage/ob_tenant_tablet_stat_mgr.h +++ b/src/storage/ob_tenant_tablet_stat_mgr.h @@ -14,6 +14,7 @@ #include "lib/allocator/page_arena.h" #include "lib/allocator/ob_fifo_allocator.h" #include "lib/lock/ob_bucket_lock.h" +#include "lib/lock/ob_tc_rwlock.h" #include "lib/queue/ob_fixed_queue.h" #include "lib/list/ob_dlist.h" @@ -69,36 +70,18 @@ public: void reset() { MEMSET(this, 0, sizeof(ObTabletStat)); } bool is_valid() const; bool check_need_report() const; + int64_t get_total_merge_row_count() const { return insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; } ObTabletStat& operator=(const ObTabletStat &other); ObTabletStat& operator+=(const ObTabletStat &other); ObTabletStat& archive(int64_t factor); - bool is_hot_tablet() const; - bool is_insert_mostly() const; - bool is_update_mostly() const; - bool is_delete_mostly() const; - bool is_inefficient_scan() const; - bool is_inefficient_insert() const; - bool is_inefficient_pushdown() const; TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(query_cnt), K_(merge_cnt), K_(scan_logical_row_cnt), K_(scan_physical_row_cnt), K_(scan_micro_block_cnt), K_(pushdown_micro_block_cnt), K_(exist_row_total_table_cnt), K_(exist_row_read_table_cnt), K_(insert_row_cnt), K_(update_row_cnt), K_(delete_row_cnt)); public: - static constexpr int64_t ACCESS_FREQUENCY = 5; - static constexpr int64_t BASE_FACTOR = 10; - static constexpr int64_t INSERT_PIVOT_FACTOR = 5; - static constexpr int64_t UPDATE_PIVOT_FACTOR = 4; - static constexpr int64_t DELETE_PIVOT_FACTOR = 3; - static constexpr int64_t SCAN_READ_FACTOR = 2; - static constexpr int64_t EXIST_READ_FACTOR = 7; - static constexpr int64_t BASIC_TABLE_CNT_THRESHOLD = 5; - static constexpr int64_t BASIC_MICRO_BLOCK_CNT_THRESHOLD = 16; - static constexpr int64_t BASIC_ROW_CNT_THRESHOLD = 10000; // TODO(@Danling) make it a comfiguration item - static constexpr int64_t QUERY_REPORT_MIN_ROW_CNT = 100; - static constexpr int64_t QUERY_REPORT_MIN_MICRO_BLOCK_CNT = 10; - static constexpr int64_t QUERY_REPORT_MIN_SCAN_TABLE_CNT = 2; - static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 100; + static constexpr int64_t QUERY_REPORT_INEFFICIENT_THRESHOLD = 3; + static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 1000; public: int64_t ls_id_; uint64_t tablet_id_; @@ -116,6 +99,52 @@ public: }; +struct ObTabletStatAnalyzer +{ +public: + ObTabletStatAnalyzer() = default; + ~ObTabletStatAnalyzer() = default; + bool is_hot_tablet() const; + bool is_insert_mostly() const; + bool is_update_or_delete_mostly() const; + bool has_slow_query() const; + TO_STRING_KV(K_(tablet_stat), K_(is_small_tenant), K_(boost_factor)); +public: + static constexpr int64_t ACCESS_FREQUENCY = 5; + static constexpr int64_t BASE_FACTOR = 10; + static constexpr int64_t LOAD_THRESHOLD = 7; + static constexpr int64_t TOMBSTONE_THRESHOLD = 3; + static constexpr int64_t QUERY_BASIC_ROW_CNT = 1000; + static constexpr int64_t QUERY_BASIC_MICRO_BLOCK_CNT = 10; + static constexpr int64_t QUERY_BASIC_ITER_TABLE_CNT = 5; + static constexpr int64_t MERGE_BASIC_ROW_CNT = 10000; +public: + ObTabletStat tablet_stat_; + int64_t boost_factor_; + bool is_small_tenant_; +}; + + +struct ObTenantSysStat +{ +public: + ObTenantSysStat(); + ~ObTenantSysStat() = default; + void reset(); + bool is_small_tenant() const; + bool is_full_cpu_usage() const; + TO_STRING_KV(K_(cpu_usage_percentage), K_(min_cpu_cnt), K_(max_cpu_cnt), K_(memory_hold), K_(memory_limit)); + +public: + static constexpr double EPS = 1e-9; + double cpu_usage_percentage_; + double min_cpu_cnt_; + double max_cpu_cnt_; + int64_t memory_hold_; + int64_t memory_limit_; +}; + + template class ObTabletStatBucket { @@ -302,6 +331,11 @@ public: const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, common::ObIArray &tablet_stats); + int get_tablet_analyzer( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTabletStatAnalyzer &analyzer); + int get_sys_stat(ObTenantSysStat &sys_stat); void process_stats(); void refresh_all(const int64_t step); private: @@ -326,18 +360,18 @@ private: static constexpr int64_t TABLET_STAT_PROCESS_INTERVAL = 5 * 1000L * 1000L; //5s static constexpr int64_t CHECK_INTERVAL = 120L * 1000L * 1000L; //120s static constexpr int64_t CHECK_RUNNING_TIME_INTERVAL = 120L * 1000L * 1000L; //120s - static constexpr int64_t DUMP_TABLET_STAT_INTERVAL = 60 * 1000LL * 1000LL; //60s + static constexpr int64_t CHECK_SYS_STAT_INTERVAL = 10 * 1000LL * 1000LL; //10s static constexpr int32_t DEFAULT_MAX_FREE_STREAM_CNT = 10000; static constexpr int32_t DEFAULT_UP_LIMIT_STREAM_CNT = 20000; static constexpr int32_t DEFAULT_BUCKET_NUM = 1000; - static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 20000; + static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 40000; static constexpr int32_t MAX_REPORT_RETRY_CNT = 5; TabletStatUpdater report_stat_task_; ObTabletStreamPool stream_pool_; TabletStreamMap stream_map_; common::ObBucketLock bucket_lock_; - ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT]; + ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT]; // 12 * 8 * 40000 bytes uint64_t report_cursor_; uint64_t pending_cursor_; int report_tg_id_; diff --git a/unittest/storage/test_tenant_tablet_stat_mgr.cpp b/unittest/storage/test_tenant_tablet_stat_mgr.cpp index f7d3d92900..a02f544967 100644 --- a/unittest/storage/test_tenant_tablet_stat_mgr.cpp +++ b/unittest/storage/test_tenant_tablet_stat_mgr.cpp @@ -113,7 +113,7 @@ void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num) for (int64_t i = 0; i < report_num; ++i) { ObTabletStat curr_stat; curr_stat.ls_id_ = 1; - curr_stat.tablet_id_ = 10001 + i; + curr_stat.tablet_id_ = 300001 + i; curr_stat.query_cnt_ = 100 * (i + 1); curr_stat.scan_physical_row_cnt_ = 10000 + i; @@ -201,10 +201,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stream) { ObTabletStat tablet_stat; tablet_stat.ls_id_ = 1; - tablet_stat.tablet_id_ = 1; + tablet_stat.tablet_id_ = 200123; tablet_stat.query_cnt_ = 100; - tablet_stat.scan_logical_row_cnt_ = 100; - tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 1000000; + tablet_stat.scan_physical_row_cnt_ = 1000000; ObTabletStream stream; auto &curr_buckets = stream.curr_buckets_; @@ -380,10 +380,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr) ObTabletStat tablet_stat; tablet_stat.ls_id_ = 1; - tablet_stat.tablet_id_ = 123; + tablet_stat.tablet_id_ = 200123; tablet_stat.query_cnt_ = 100; - tablet_stat.scan_logical_row_cnt_ = 100; - tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 100000; + tablet_stat.scan_physical_row_cnt_ = 1000000; bool report_succ = false; ret = stat_mgr_->report_stat(tablet_stat, report_succ); @@ -392,7 +392,7 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr) ObTabletStat res; share::ObLSID ls_id(1); - common::ObTabletID tablet_id(123); + common::ObTabletID tablet_id(200123); ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res); ASSERT_EQ(OB_SUCCESS, ret); ASSERT_EQ(100, res.query_cnt_);