disable adaptvie compaction when detecting high CPU load
This commit is contained in:
@ -1428,32 +1428,37 @@ int ObAdaptiveMergePolicy::get_adaptive_merge_reason(
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
|
||||
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
|
||||
ObTabletStat tablet_stat;
|
||||
reason = AdaptiveMergeReason::NONE;
|
||||
|
||||
if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(ls_id, tablet_id, tablet_stat))) {
|
||||
reason = AdaptiveMergeReason::NONE;
|
||||
ObTabletStatAnalyzer tablet_analyzer;
|
||||
|
||||
if (tablet_id.is_special_merge_tablet()) {
|
||||
// do nothing
|
||||
} else if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_tablet_analyzer(ls_id, tablet_id, tablet_analyzer))) {
|
||||
if (OB_HASH_NOT_EXIST != ret) {
|
||||
LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id));
|
||||
LOG_WARN("failed to get tablet analyzer stat", K(ret), K(ls_id), K(tablet_id));
|
||||
} else if (OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
|
||||
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
} else {
|
||||
ret = OB_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
if (OB_TMP_FAIL(check_tombstone_situation(tablet_stat, tablet, reason))) {
|
||||
LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, tablet, reason))) {
|
||||
LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
|
||||
}
|
||||
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_stat, tablet, reason))) {
|
||||
LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_analyzer, tablet, reason))) {
|
||||
LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
|
||||
}
|
||||
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
|
||||
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
|
||||
}
|
||||
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_analyzer, tablet, reason))) {
|
||||
LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
|
||||
}
|
||||
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_stat, tablet, reason))) {
|
||||
LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
}
|
||||
|
||||
if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) {
|
||||
LOG_INFO("Check tablet adaptive merge reason", K(reason), K(tablet_stat)); // TODO tmp log, remove later
|
||||
}
|
||||
LOG_INFO("Check tablet adaptive merge reason", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_analyzer));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1489,7 +1494,7 @@ int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage(
|
||||
}
|
||||
|
||||
int ObAdaptiveMergePolicy::check_load_data_situation(
|
||||
const ObTabletStat &tablet_stat,
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const ObTablet &tablet,
|
||||
AdaptiveMergeReason &reason)
|
||||
{
|
||||
@ -1497,19 +1502,20 @@ int ObAdaptiveMergePolicy::check_load_data_situation(
|
||||
const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
|
||||
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
|
||||
reason = AdaptiveMergeReason::NONE;
|
||||
if (!tablet.is_valid() || !tablet_stat.is_valid()
|
||||
|| ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
|
||||
|
||||
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|
||||
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
|
||||
} else if (tablet_stat.is_hot_tablet() && tablet_stat.is_insert_mostly()) {
|
||||
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer));
|
||||
} else if (analyzer.is_hot_tablet() && analyzer.is_insert_mostly()) {
|
||||
reason = AdaptiveMergeReason::LOAD_DATA_SCENE;
|
||||
}
|
||||
LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
|
||||
LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAdaptiveMergePolicy::check_tombstone_situation(
|
||||
const ObTabletStat &tablet_stat,
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const ObTablet &tablet,
|
||||
AdaptiveMergeReason &reason)
|
||||
{
|
||||
@ -1518,19 +1524,19 @@ int ObAdaptiveMergePolicy::check_tombstone_situation(
|
||||
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
|
||||
reason = AdaptiveMergeReason::NONE;
|
||||
|
||||
if (!tablet.is_valid() || !tablet_stat.is_valid()
|
||||
|| ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
|
||||
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|
||||
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
|
||||
} else if (tablet_stat.is_hot_tablet() && (tablet_stat.is_update_mostly() || tablet_stat.is_delete_mostly())) {
|
||||
LOG_WARN("get invalid arguments", K(ret), K(analyzer), K(tablet));
|
||||
} else if (analyzer.tablet_stat_.merge_cnt_ > 1 && analyzer.is_update_or_delete_mostly()) {
|
||||
reason = AdaptiveMergeReason::TOMBSTONE_SCENE;
|
||||
}
|
||||
LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
|
||||
LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAdaptiveMergePolicy::check_ineffecient_read(
|
||||
const ObTabletStat &tablet_stat,
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const ObTablet &tablet,
|
||||
AdaptiveMergeReason &reason)
|
||||
{
|
||||
@ -1539,16 +1545,14 @@ int ObAdaptiveMergePolicy::check_ineffecient_read(
|
||||
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
|
||||
reason = AdaptiveMergeReason::NONE;
|
||||
|
||||
if (!tablet.is_valid() || !tablet_stat.is_valid() ||
|
||||
ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
|
||||
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|
||||
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
|
||||
} else if (!tablet_stat.is_hot_tablet()) {
|
||||
} else if (tablet_stat.is_inefficient_scan() || tablet_stat.is_inefficient_insert()
|
||||
|| tablet_stat.is_inefficient_pushdown()) {
|
||||
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer));
|
||||
} else if (analyzer.is_hot_tablet() && analyzer.has_slow_query()) {
|
||||
reason = AdaptiveMergeReason::INEFFICIENT_QUERY;
|
||||
}
|
||||
LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
|
||||
LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@ class ObTabletTableStore;
|
||||
class ObGetMergeTablesResult;
|
||||
class ObTablesHandleArray;
|
||||
class ObStorageSchema;
|
||||
struct ObTabletStat;
|
||||
struct ObTabletStatAnalyzer;
|
||||
struct ObTableHandleV2;
|
||||
class ObLS;
|
||||
class ObTableStoreIterator;
|
||||
@ -242,13 +242,16 @@ private:
|
||||
storage::ObGetMergeTablesResult &result,
|
||||
const bool update_snapshot_flag);
|
||||
private:
|
||||
static int check_load_data_situation(const storage::ObTabletStat &tablet_stat,
|
||||
static int check_load_data_situation(
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const storage::ObTablet &tablet,
|
||||
AdaptiveMergeReason &merge_reason);
|
||||
static int check_tombstone_situation(const storage::ObTabletStat &tablet_stat,
|
||||
static int check_tombstone_situation(
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const storage::ObTablet &tablet,
|
||||
AdaptiveMergeReason &merge_reason);
|
||||
static int check_ineffecient_read(const storage::ObTabletStat &tablet_stat,
|
||||
static int check_ineffecient_read(
|
||||
const storage::ObTabletStatAnalyzer &analyzer,
|
||||
const storage::ObTablet &tablet,
|
||||
AdaptiveMergeReason &merge_reason);
|
||||
static int check_inc_sstable_row_cnt_percentage(
|
||||
@ -263,7 +266,7 @@ private:
|
||||
static constexpr int64_t LOAD_DATA_SCENE_THRESHOLD = 70;
|
||||
static constexpr int64_t TOMBSTONE_SCENE_THRESHOLD = 50;
|
||||
static constexpr float INC_ROW_COUNT_PERCENTAGE_THRESHOLD = 0.5;
|
||||
static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 1000L; // 1k
|
||||
static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 10000L; // 10k
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -38,6 +38,7 @@
|
||||
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
|
||||
#include "share/ob_get_compat_mode.h"
|
||||
#include "share/ob_tablet_meta_table_compaction_operator.h"
|
||||
#include "share/resource_manager/ob_cgroup_ctrl.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -1297,6 +1298,7 @@ int ObTabletMergeFinishTask::try_schedule_compaction_after_mini(
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
const ObTabletID &tablet_id = ctx.param_.tablet_id_;
|
||||
ObLSID ls_id = ctx.param_.ls_id_;
|
||||
|
||||
// report tablet stat
|
||||
if (0 == ctx.get_merge_info().get_sstable_merge_info().macro_block_count_) {
|
||||
// empty mini compaction, no need to reprot stat
|
||||
|
||||
@ -985,6 +985,17 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||
DEL_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX));
|
||||
}
|
||||
|
||||
bool enable_adaptive_compaction = enable_adaptive_compaction_;
|
||||
ObTenantSysStat cur_sys_stat;
|
||||
if (!enable_adaptive_compaction_) {
|
||||
// do nothing
|
||||
} else if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_sys_stat(cur_sys_stat))) {
|
||||
LOG_WARN("failed to get tenant sys stat", K(tmp_ret), K(cur_sys_stat));
|
||||
} else if (cur_sys_stat.is_full_cpu_usage()) {
|
||||
enable_adaptive_compaction = false;
|
||||
FLOG_INFO("disable adaptive compaction due to the high load CPU", K(ret), K(cur_sys_stat));
|
||||
}
|
||||
|
||||
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls
|
||||
bool tablet_merge_finish = false;
|
||||
if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) {
|
||||
@ -1034,14 +1045,14 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
|
||||
} else if (ObTimeUtility::fast_current_time() * 1000 <
|
||||
tablet->get_medium_compaction_info_list().get_wait_check_medium_scn() + WAIT_MEDIUM_CHECK_THRESHOLD) {
|
||||
// need wait 10 mins before schedule meta major
|
||||
} else if (enable_adaptive_compaction_ && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) {
|
||||
} else if (enable_adaptive_compaction && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) {
|
||||
if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) {
|
||||
LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (could_schedule_next_medium && could_major_merge
|
||||
&& (!tablet_merge_finish || enable_adaptive_compaction_ || check_medium_finish)
|
||||
&& (!tablet_merge_finish || enable_adaptive_compaction || check_medium_finish)
|
||||
&& OB_TMP_FAIL(func.schedule_next_medium_for_leader(
|
||||
tablet_merge_finish ? 0 : merge_version, schedule_stats_))) { // schedule another round
|
||||
LOG_WARN("failed to schedule next medium", K(tmp_ret), K(ls_id), K(tablet_id));
|
||||
|
||||
@ -9,6 +9,8 @@
|
||||
#include "share/ob_force_print_log.h"
|
||||
#include "share/ob_thread_mgr.h"
|
||||
#include "storage/ob_tenant_tablet_stat_mgr.h"
|
||||
#include "observer/ob_server_struct.h"
|
||||
#include "observer/ob_server.h"
|
||||
|
||||
using namespace oceanbase;
|
||||
using namespace oceanbase::common;
|
||||
@ -105,17 +107,28 @@ bool ObTabletStat::is_valid() const
|
||||
bool ObTabletStat::check_need_report() const
|
||||
{
|
||||
bool bret = false;
|
||||
ObTabletID tablet_id(tablet_id_);
|
||||
|
||||
if (0 != query_cnt_) { // report by query
|
||||
if (QUERY_REPORT_MIN_ROW_CNT <= scan_physical_row_cnt_ ||
|
||||
QUERY_REPORT_MIN_MICRO_BLOCK_CNT <= scan_micro_block_cnt_ ||
|
||||
QUERY_REPORT_MIN_SCAN_TABLE_CNT <= exist_row_total_table_cnt_) {
|
||||
if (tablet_id.is_ls_inner_tablet()) {
|
||||
// do nothing
|
||||
} else if (0 < merge_cnt_) { // report by compaction
|
||||
bret = get_total_merge_row_count() >= MERGE_REPORT_MIN_ROW_CNT;
|
||||
} else if (0 < query_cnt_) { // only report the slow query
|
||||
const int64_t boost_factor = tablet_id.is_inner_tablet() ? 2 : 1;
|
||||
if (scan_physical_row_cnt_ > 0 &&
|
||||
scan_physical_row_cnt_ >= scan_logical_row_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
|
||||
bret = true;
|
||||
}
|
||||
|
||||
if (!bret && scan_micro_block_cnt_ > 0 &&
|
||||
scan_micro_block_cnt_ >= pushdown_micro_block_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
|
||||
bret = true;
|
||||
}
|
||||
|
||||
if (!bret && exist_row_total_table_cnt_ > 0 &&
|
||||
exist_row_total_table_cnt_ >= exist_row_read_table_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
|
||||
bret = true;
|
||||
}
|
||||
} else if (0 != merge_cnt_) { // report by compaction
|
||||
bret = MERGE_REPORT_MIN_ROW_CNT <= insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
|
||||
} else { // invalid tablet stat
|
||||
bret = false;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
@ -166,74 +179,96 @@ ObTabletStat& ObTabletStat::archive(int64_t factor)
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_hot_tablet() const
|
||||
|
||||
/************************************* ObTabletStatAnalyzer *************************************/
|
||||
bool ObTabletStatAnalyzer::is_hot_tablet() const
|
||||
{
|
||||
return query_cnt_ + merge_cnt_ >= ACCESS_FREQUENCY;
|
||||
return tablet_stat_.query_cnt_ + tablet_stat_.merge_cnt_ >= ACCESS_FREQUENCY * boost_factor_;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_insert_mostly() const
|
||||
bool ObTabletStatAnalyzer::is_insert_mostly() const
|
||||
{
|
||||
bool bret = false;
|
||||
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
|
||||
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
|
||||
ObTabletID tablet_id(tablet_stat_.tablet_id_);
|
||||
uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count();
|
||||
|
||||
if (tablet_id.is_inner_tablet() || tablet_id.is_ls_inner_tablet()) {
|
||||
// do nothing
|
||||
} else if (0 == tablet_stat_.insert_row_cnt_) {
|
||||
// no insert occurs
|
||||
} else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) {
|
||||
// do nothing
|
||||
} else {
|
||||
bret = insert_row_cnt_ * BASE_FACTOR / total_row_cnt >= INSERT_PIVOT_FACTOR;
|
||||
bret = total_row_cnt * LOAD_THRESHOLD <= tablet_stat_.insert_row_cnt_ * BASE_FACTOR;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_update_mostly() const
|
||||
bool ObTabletStatAnalyzer::is_update_or_delete_mostly() const
|
||||
{
|
||||
bool bret = false;
|
||||
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
|
||||
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
|
||||
uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count();
|
||||
|
||||
if (0 == tablet_stat_.delete_row_cnt_ + tablet_stat_.update_row_cnt_) {
|
||||
// no update && delete occurs
|
||||
} else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) {
|
||||
// do nothing
|
||||
} else {
|
||||
bret = update_row_cnt_ * BASE_FACTOR / total_row_cnt >= UPDATE_PIVOT_FACTOR;
|
||||
bret = total_row_cnt * TOMBSTONE_THRESHOLD * boost_factor_ <= (tablet_stat_.update_row_cnt_ + tablet_stat_.delete_row_cnt_) * BASE_FACTOR;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_delete_mostly() const
|
||||
bool ObTabletStatAnalyzer::has_slow_query() const
|
||||
{
|
||||
bool bret = false;
|
||||
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
|
||||
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
|
||||
// do nothing
|
||||
} else {
|
||||
bret = delete_row_cnt_ * BASE_FACTOR / total_row_cnt >= DELETE_PIVOT_FACTOR;
|
||||
// all tablet query stats are ineffecient, only check the basic threshold
|
||||
if (tablet_stat_.scan_physical_row_cnt_ >= QUERY_BASIC_ROW_CNT * boost_factor_ ||
|
||||
tablet_stat_.scan_micro_block_cnt_ >= QUERY_BASIC_MICRO_BLOCK_CNT * boost_factor_ ||
|
||||
tablet_stat_.exist_row_total_table_cnt_ >= QUERY_BASIC_ITER_TABLE_CNT * boost_factor_) {
|
||||
bret = true;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
|
||||
|
||||
bool ObTabletStat::is_inefficient_scan() const
|
||||
/************************************* ObTenantSysStat *************************************/
|
||||
ObTenantSysStat::ObTenantSysStat()
|
||||
: cpu_usage_percentage_(0),
|
||||
min_cpu_cnt_(0),
|
||||
max_cpu_cnt_(0),
|
||||
memory_hold_(0),
|
||||
memory_limit_(0)
|
||||
{
|
||||
}
|
||||
|
||||
void ObTenantSysStat::reset()
|
||||
{
|
||||
cpu_usage_percentage_ = 0;
|
||||
min_cpu_cnt_ = 0;
|
||||
max_cpu_cnt_ = 0;
|
||||
memory_hold_ = 0;
|
||||
memory_limit_ = 0;
|
||||
}
|
||||
|
||||
bool ObTenantSysStat::is_small_tenant() const
|
||||
{
|
||||
bool bret = false;
|
||||
if (0 == scan_logical_row_cnt_ || scan_logical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) {
|
||||
} else {
|
||||
bret = scan_physical_row_cnt_ / scan_logical_row_cnt_ >= SCAN_READ_FACTOR;
|
||||
}
|
||||
// 8c16g
|
||||
const int64_t cpu_threshold = 8;
|
||||
// When the tenant memory exceeds 10GB, the meta tenant occupies at least 10% of the memory.
|
||||
const int64_t mem_threshold = (16L << 30) * 9 / 10;
|
||||
bret = max_cpu_cnt_ < cpu_threshold || memory_limit_ < mem_threshold;
|
||||
return bret;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_inefficient_insert() const
|
||||
bool ObTenantSysStat::is_full_cpu_usage() const
|
||||
{
|
||||
bool bret = false;
|
||||
if (0 == exist_row_total_table_cnt_ || exist_row_total_table_cnt_ < BASIC_TABLE_CNT_THRESHOLD) {
|
||||
if (is_small_tenant()) {
|
||||
bret = max_cpu_cnt_ * 60 <= cpu_usage_percentage_;
|
||||
} else {
|
||||
bret = exist_row_read_table_cnt_ * BASE_FACTOR / exist_row_total_table_cnt_ >= EXIST_READ_FACTOR;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
|
||||
bool ObTabletStat::is_inefficient_pushdown() const
|
||||
{
|
||||
bool bret = false;
|
||||
if (0 == scan_micro_block_cnt_ || scan_micro_block_cnt_ < BASIC_MICRO_BLOCK_CNT_THRESHOLD) {
|
||||
} else {
|
||||
bret = pushdown_micro_block_cnt_ < scan_micro_block_cnt_ / SCAN_READ_FACTOR;
|
||||
bret = max_cpu_cnt_ * 70 <= cpu_usage_percentage_;
|
||||
}
|
||||
return bret;
|
||||
}
|
||||
@ -648,6 +683,44 @@ int ObTenantTabletStatMgr::get_history_tablet_stats(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantTabletStatMgr::get_tablet_analyzer(
|
||||
const share::ObLSID &ls_id,
|
||||
const common::ObTabletID &tablet_id,
|
||||
ObTabletStatAnalyzer &analyzer)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObTenantSysStat sys_stat;
|
||||
|
||||
if (OB_FAIL(get_latest_tablet_stat(ls_id, tablet_id, analyzer.tablet_stat_))) {
|
||||
LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id));
|
||||
} else if (OB_FAIL(get_sys_stat(sys_stat))) {
|
||||
LOG_WARN("failed to get sys stat", K(ret));
|
||||
} else {
|
||||
analyzer.is_small_tenant_ = sys_stat.is_small_tenant();
|
||||
analyzer.boost_factor_ = analyzer.is_small_tenant_ ? 2 : 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantTabletStatMgr::get_sys_stat(ObTenantSysStat &sys_stat)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("ObTenantTabletStatMgr not inited", K(ret));
|
||||
} else if (OB_FAIL(GCTX.omt_->get_tenant_cpu_usage(MTL_ID(), sys_stat.cpu_usage_percentage_))) {
|
||||
LOG_WARN("failed to get tenant cpu usage", K(ret), K(sys_stat));
|
||||
} else if (OB_FAIL(GCTX.omt_->get_tenant_cpu(MTL_ID(), sys_stat.min_cpu_cnt_, sys_stat.max_cpu_cnt_))) {
|
||||
LOG_WARN("failed to get tenant cpu count", K(ret), K(sys_stat));
|
||||
} else {
|
||||
sys_stat.memory_hold_ = lib::get_tenant_memory_hold(MTL_ID());
|
||||
sys_stat.memory_limit_ = lib::get_tenant_memory_limit(MTL_ID());
|
||||
sys_stat.cpu_usage_percentage_ *= 100;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantTabletStatMgr::update_tablet_stream(const ObTabletStat &report_stat)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -723,7 +796,7 @@ int ObTenantTabletStatMgr::fetch_node(ObTabletStreamNode *&node)
|
||||
|
||||
void ObTenantTabletStatMgr::process_stats()
|
||||
{
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
int ret = OB_SUCCESS;
|
||||
const uint64_t start_idx = report_cursor_;
|
||||
const uint64_t pending_cur = ATOMIC_LOAD(&pending_cursor_);
|
||||
uint64_t end_idx = (pending_cur > start_idx + DEFAULT_MAX_PENDING_CNT)
|
||||
@ -734,10 +807,10 @@ void ObTenantTabletStatMgr::process_stats()
|
||||
} else {
|
||||
for (uint64_t i = start_idx; i < end_idx; ++i) {
|
||||
const ObTabletStat &cur_stat = report_queue_[i % DEFAULT_MAX_PENDING_CNT];
|
||||
if (!cur_stat.is_valid()) {
|
||||
if (OB_UNLIKELY(!cur_stat.is_valid())) {
|
||||
// allow dirty read
|
||||
} else if (OB_TMP_FAIL(update_tablet_stream(cur_stat))) {
|
||||
LOG_WARN_RET(tmp_ret, "failed to update tablet stat", K(tmp_ret), K(cur_stat));
|
||||
} else if (OB_FAIL(update_tablet_stream(cur_stat))) {
|
||||
LOG_WARN_RET(ret, "failed to update tablet stat", K(ret), K(cur_stat));
|
||||
}
|
||||
}
|
||||
report_cursor_ = pending_cur; // only TabletStatUpdater update this value.
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include "lib/allocator/page_arena.h"
|
||||
#include "lib/allocator/ob_fifo_allocator.h"
|
||||
#include "lib/lock/ob_bucket_lock.h"
|
||||
#include "lib/lock/ob_tc_rwlock.h"
|
||||
#include "lib/queue/ob_fixed_queue.h"
|
||||
#include "lib/list/ob_dlist.h"
|
||||
|
||||
@ -69,36 +70,18 @@ public:
|
||||
void reset() { MEMSET(this, 0, sizeof(ObTabletStat)); }
|
||||
bool is_valid() const;
|
||||
bool check_need_report() const;
|
||||
int64_t get_total_merge_row_count() const { return insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; }
|
||||
ObTabletStat& operator=(const ObTabletStat &other);
|
||||
ObTabletStat& operator+=(const ObTabletStat &other);
|
||||
ObTabletStat& archive(int64_t factor);
|
||||
bool is_hot_tablet() const;
|
||||
bool is_insert_mostly() const;
|
||||
bool is_update_mostly() const;
|
||||
bool is_delete_mostly() const;
|
||||
bool is_inefficient_scan() const;
|
||||
bool is_inefficient_insert() const;
|
||||
bool is_inefficient_pushdown() const;
|
||||
TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(query_cnt), K_(merge_cnt), K_(scan_logical_row_cnt),
|
||||
K_(scan_physical_row_cnt), K_(scan_micro_block_cnt), K_(pushdown_micro_block_cnt),
|
||||
K_(exist_row_total_table_cnt), K_(exist_row_read_table_cnt), K_(insert_row_cnt),
|
||||
K_(update_row_cnt), K_(delete_row_cnt));
|
||||
|
||||
public:
|
||||
static constexpr int64_t ACCESS_FREQUENCY = 5;
|
||||
static constexpr int64_t BASE_FACTOR = 10;
|
||||
static constexpr int64_t INSERT_PIVOT_FACTOR = 5;
|
||||
static constexpr int64_t UPDATE_PIVOT_FACTOR = 4;
|
||||
static constexpr int64_t DELETE_PIVOT_FACTOR = 3;
|
||||
static constexpr int64_t SCAN_READ_FACTOR = 2;
|
||||
static constexpr int64_t EXIST_READ_FACTOR = 7;
|
||||
static constexpr int64_t BASIC_TABLE_CNT_THRESHOLD = 5;
|
||||
static constexpr int64_t BASIC_MICRO_BLOCK_CNT_THRESHOLD = 16;
|
||||
static constexpr int64_t BASIC_ROW_CNT_THRESHOLD = 10000; // TODO(@Danling) make it a comfiguration item
|
||||
static constexpr int64_t QUERY_REPORT_MIN_ROW_CNT = 100;
|
||||
static constexpr int64_t QUERY_REPORT_MIN_MICRO_BLOCK_CNT = 10;
|
||||
static constexpr int64_t QUERY_REPORT_MIN_SCAN_TABLE_CNT = 2;
|
||||
static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 100;
|
||||
static constexpr int64_t QUERY_REPORT_INEFFICIENT_THRESHOLD = 3;
|
||||
static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 1000;
|
||||
public:
|
||||
int64_t ls_id_;
|
||||
uint64_t tablet_id_;
|
||||
@ -116,6 +99,52 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct ObTabletStatAnalyzer
|
||||
{
|
||||
public:
|
||||
ObTabletStatAnalyzer() = default;
|
||||
~ObTabletStatAnalyzer() = default;
|
||||
bool is_hot_tablet() const;
|
||||
bool is_insert_mostly() const;
|
||||
bool is_update_or_delete_mostly() const;
|
||||
bool has_slow_query() const;
|
||||
TO_STRING_KV(K_(tablet_stat), K_(is_small_tenant), K_(boost_factor));
|
||||
public:
|
||||
static constexpr int64_t ACCESS_FREQUENCY = 5;
|
||||
static constexpr int64_t BASE_FACTOR = 10;
|
||||
static constexpr int64_t LOAD_THRESHOLD = 7;
|
||||
static constexpr int64_t TOMBSTONE_THRESHOLD = 3;
|
||||
static constexpr int64_t QUERY_BASIC_ROW_CNT = 1000;
|
||||
static constexpr int64_t QUERY_BASIC_MICRO_BLOCK_CNT = 10;
|
||||
static constexpr int64_t QUERY_BASIC_ITER_TABLE_CNT = 5;
|
||||
static constexpr int64_t MERGE_BASIC_ROW_CNT = 10000;
|
||||
public:
|
||||
ObTabletStat tablet_stat_;
|
||||
int64_t boost_factor_;
|
||||
bool is_small_tenant_;
|
||||
};
|
||||
|
||||
|
||||
struct ObTenantSysStat
|
||||
{
|
||||
public:
|
||||
ObTenantSysStat();
|
||||
~ObTenantSysStat() = default;
|
||||
void reset();
|
||||
bool is_small_tenant() const;
|
||||
bool is_full_cpu_usage() const;
|
||||
TO_STRING_KV(K_(cpu_usage_percentage), K_(min_cpu_cnt), K_(max_cpu_cnt), K_(memory_hold), K_(memory_limit));
|
||||
|
||||
public:
|
||||
static constexpr double EPS = 1e-9;
|
||||
double cpu_usage_percentage_;
|
||||
double min_cpu_cnt_;
|
||||
double max_cpu_cnt_;
|
||||
int64_t memory_hold_;
|
||||
int64_t memory_limit_;
|
||||
};
|
||||
|
||||
|
||||
template<uint32_t SIZE>
|
||||
class ObTabletStatBucket
|
||||
{
|
||||
@ -302,6 +331,11 @@ public:
|
||||
const share::ObLSID &ls_id,
|
||||
const common::ObTabletID &tablet_id,
|
||||
common::ObIArray<ObTabletStat> &tablet_stats);
|
||||
int get_tablet_analyzer(
|
||||
const share::ObLSID &ls_id,
|
||||
const common::ObTabletID &tablet_id,
|
||||
ObTabletStatAnalyzer &analyzer);
|
||||
int get_sys_stat(ObTenantSysStat &sys_stat);
|
||||
void process_stats();
|
||||
void refresh_all(const int64_t step);
|
||||
private:
|
||||
@ -326,18 +360,18 @@ private:
|
||||
static constexpr int64_t TABLET_STAT_PROCESS_INTERVAL = 5 * 1000L * 1000L; //5s
|
||||
static constexpr int64_t CHECK_INTERVAL = 120L * 1000L * 1000L; //120s
|
||||
static constexpr int64_t CHECK_RUNNING_TIME_INTERVAL = 120L * 1000L * 1000L; //120s
|
||||
static constexpr int64_t DUMP_TABLET_STAT_INTERVAL = 60 * 1000LL * 1000LL; //60s
|
||||
static constexpr int64_t CHECK_SYS_STAT_INTERVAL = 10 * 1000LL * 1000LL; //10s
|
||||
static constexpr int32_t DEFAULT_MAX_FREE_STREAM_CNT = 10000;
|
||||
static constexpr int32_t DEFAULT_UP_LIMIT_STREAM_CNT = 20000;
|
||||
static constexpr int32_t DEFAULT_BUCKET_NUM = 1000;
|
||||
static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 20000;
|
||||
static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 40000;
|
||||
static constexpr int32_t MAX_REPORT_RETRY_CNT = 5;
|
||||
|
||||
TabletStatUpdater report_stat_task_;
|
||||
ObTabletStreamPool stream_pool_;
|
||||
TabletStreamMap stream_map_;
|
||||
common::ObBucketLock bucket_lock_;
|
||||
ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT];
|
||||
ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT]; // 12 * 8 * 40000 bytes
|
||||
uint64_t report_cursor_;
|
||||
uint64_t pending_cursor_;
|
||||
int report_tg_id_;
|
||||
|
||||
@ -113,7 +113,7 @@ void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num)
|
||||
for (int64_t i = 0; i < report_num; ++i) {
|
||||
ObTabletStat curr_stat;
|
||||
curr_stat.ls_id_ = 1;
|
||||
curr_stat.tablet_id_ = 10001 + i;
|
||||
curr_stat.tablet_id_ = 300001 + i;
|
||||
curr_stat.query_cnt_ = 100 * (i + 1);
|
||||
curr_stat.scan_physical_row_cnt_ = 10000 + i;
|
||||
|
||||
@ -201,10 +201,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stream)
|
||||
{
|
||||
ObTabletStat tablet_stat;
|
||||
tablet_stat.ls_id_ = 1;
|
||||
tablet_stat.tablet_id_ = 1;
|
||||
tablet_stat.tablet_id_ = 200123;
|
||||
tablet_stat.query_cnt_ = 100;
|
||||
tablet_stat.scan_logical_row_cnt_ = 100;
|
||||
tablet_stat.scan_physical_row_cnt_ = 100;
|
||||
tablet_stat.scan_logical_row_cnt_ = 1000000;
|
||||
tablet_stat.scan_physical_row_cnt_ = 1000000;
|
||||
|
||||
ObTabletStream stream;
|
||||
auto &curr_buckets = stream.curr_buckets_;
|
||||
@ -380,10 +380,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr)
|
||||
|
||||
ObTabletStat tablet_stat;
|
||||
tablet_stat.ls_id_ = 1;
|
||||
tablet_stat.tablet_id_ = 123;
|
||||
tablet_stat.tablet_id_ = 200123;
|
||||
tablet_stat.query_cnt_ = 100;
|
||||
tablet_stat.scan_logical_row_cnt_ = 100;
|
||||
tablet_stat.scan_physical_row_cnt_ = 100;
|
||||
tablet_stat.scan_logical_row_cnt_ = 100000;
|
||||
tablet_stat.scan_physical_row_cnt_ = 1000000;
|
||||
|
||||
bool report_succ = false;
|
||||
ret = stat_mgr_->report_stat(tablet_stat, report_succ);
|
||||
@ -392,7 +392,7 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr)
|
||||
|
||||
ObTabletStat res;
|
||||
share::ObLSID ls_id(1);
|
||||
common::ObTabletID tablet_id(123);
|
||||
common::ObTabletID tablet_id(200123);
|
||||
ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res);
|
||||
ASSERT_EQ(OB_SUCCESS, ret);
|
||||
ASSERT_EQ(100, res.query_cnt_);
|
||||
|
||||
Reference in New Issue
Block a user