disable adaptvie compaction when detecting high CPU load

This commit is contained in:
Fengjingkun
2023-06-16 09:42:05 +00:00
committed by ob-robot
parent 31f066d0a6
commit 12e2837207
7 changed files with 252 additions and 125 deletions

View File

@ -1428,32 +1428,37 @@ int ObAdaptiveMergePolicy::get_adaptive_merge_reason(
int tmp_ret = OB_SUCCESS;
const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
ObTabletStat tablet_stat;
reason = AdaptiveMergeReason::NONE;
if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(ls_id, tablet_id, tablet_stat))) {
reason = AdaptiveMergeReason::NONE;
ObTabletStatAnalyzer tablet_analyzer;
if (tablet_id.is_special_merge_tablet()) {
// do nothing
} else if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_tablet_analyzer(ls_id, tablet_id, tablet_analyzer))) {
if (OB_HASH_NOT_EXIST != ret) {
LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id));
LOG_WARN("failed to get tablet analyzer stat", K(ret), K(ls_id), K(tablet_id));
} else if (OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
} else {
ret = OB_SUCCESS;
}
} else {
if (OB_TMP_FAIL(check_tombstone_situation(tablet_stat, tablet, reason))) {
LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id));
if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, tablet, reason))) {
LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
}
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_stat, tablet, reason))) {
LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id));
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_analyzer, tablet, reason))) {
LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
}
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) {
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id));
LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
}
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_analyzer, tablet, reason))) {
LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer));
}
if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_stat, tablet, reason))) {
LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id));
}
if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) {
LOG_INFO("Check tablet adaptive merge reason", K(reason), K(tablet_stat)); // TODO tmp log, remove later
}
LOG_INFO("Check tablet adaptive merge reason", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_analyzer));
}
return ret;
}
@ -1489,7 +1494,7 @@ int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage(
}
int ObAdaptiveMergePolicy::check_load_data_situation(
const ObTabletStat &tablet_stat,
const storage::ObTabletStatAnalyzer &analyzer,
const ObTablet &tablet,
AdaptiveMergeReason &reason)
{
@ -1497,19 +1502,20 @@ int ObAdaptiveMergePolicy::check_load_data_situation(
const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_;
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
reason = AdaptiveMergeReason::NONE;
if (!tablet.is_valid() || !tablet_stat.is_valid()
|| ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
} else if (tablet_stat.is_hot_tablet() && tablet_stat.is_insert_mostly()) {
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer));
} else if (analyzer.is_hot_tablet() && analyzer.is_insert_mostly()) {
reason = AdaptiveMergeReason::LOAD_DATA_SCENE;
}
LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
return ret;
}
int ObAdaptiveMergePolicy::check_tombstone_situation(
const ObTabletStat &tablet_stat,
const storage::ObTabletStatAnalyzer &analyzer,
const ObTablet &tablet,
AdaptiveMergeReason &reason)
{
@ -1518,19 +1524,19 @@ int ObAdaptiveMergePolicy::check_tombstone_situation(
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
reason = AdaptiveMergeReason::NONE;
if (!tablet.is_valid() || !tablet_stat.is_valid()
|| ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
} else if (tablet_stat.is_hot_tablet() && (tablet_stat.is_update_mostly() || tablet_stat.is_delete_mostly())) {
LOG_WARN("get invalid arguments", K(ret), K(analyzer), K(tablet));
} else if (analyzer.tablet_stat_.merge_cnt_ > 1 && analyzer.is_update_or_delete_mostly()) {
reason = AdaptiveMergeReason::TOMBSTONE_SCENE;
}
LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
return ret;
}
int ObAdaptiveMergePolicy::check_ineffecient_read(
const ObTabletStat &tablet_stat,
const storage::ObTabletStatAnalyzer &analyzer,
const ObTablet &tablet,
AdaptiveMergeReason &reason)
{
@ -1539,16 +1545,14 @@ int ObAdaptiveMergePolicy::check_ineffecient_read(
const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_;
reason = AdaptiveMergeReason::NONE;
if (!tablet.is_valid() || !tablet_stat.is_valid() ||
ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) {
if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid()
|| ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat));
} else if (!tablet_stat.is_hot_tablet()) {
} else if (tablet_stat.is_inefficient_scan() || tablet_stat.is_inefficient_insert()
|| tablet_stat.is_inefficient_pushdown()) {
LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer));
} else if (analyzer.is_hot_tablet() && analyzer.has_slow_query()) {
reason = AdaptiveMergeReason::INEFFICIENT_QUERY;
}
LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat));
LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer));
return ret;
}

View File

@ -28,7 +28,7 @@ class ObTabletTableStore;
class ObGetMergeTablesResult;
class ObTablesHandleArray;
class ObStorageSchema;
struct ObTabletStat;
struct ObTabletStatAnalyzer;
struct ObTableHandleV2;
class ObLS;
class ObTableStoreIterator;
@ -242,13 +242,16 @@ private:
storage::ObGetMergeTablesResult &result,
const bool update_snapshot_flag);
private:
static int check_load_data_situation(const storage::ObTabletStat &tablet_stat,
static int check_load_data_situation(
const storage::ObTabletStatAnalyzer &analyzer,
const storage::ObTablet &tablet,
AdaptiveMergeReason &merge_reason);
static int check_tombstone_situation(const storage::ObTabletStat &tablet_stat,
static int check_tombstone_situation(
const storage::ObTabletStatAnalyzer &analyzer,
const storage::ObTablet &tablet,
AdaptiveMergeReason &merge_reason);
static int check_ineffecient_read(const storage::ObTabletStat &tablet_stat,
static int check_ineffecient_read(
const storage::ObTabletStatAnalyzer &analyzer,
const storage::ObTablet &tablet,
AdaptiveMergeReason &merge_reason);
static int check_inc_sstable_row_cnt_percentage(
@ -263,7 +266,7 @@ private:
static constexpr int64_t LOAD_DATA_SCENE_THRESHOLD = 70;
static constexpr int64_t TOMBSTONE_SCENE_THRESHOLD = 50;
static constexpr float INC_ROW_COUNT_PERCENTAGE_THRESHOLD = 0.5;
static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 1000L; // 1k
static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 10000L; // 10k
};

View File

@ -38,6 +38,7 @@
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
#include "share/ob_get_compat_mode.h"
#include "share/ob_tablet_meta_table_compaction_operator.h"
#include "share/resource_manager/ob_cgroup_ctrl.h"
namespace oceanbase
{
@ -1297,6 +1298,7 @@ int ObTabletMergeFinishTask::try_schedule_compaction_after_mini(
int tmp_ret = OB_SUCCESS;
const ObTabletID &tablet_id = ctx.param_.tablet_id_;
ObLSID ls_id = ctx.param_.ls_id_;
// report tablet stat
if (0 == ctx.get_merge_info().get_sstable_merge_info().macro_block_count_) {
// empty mini compaction, no need to reprot stat

View File

@ -985,6 +985,17 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
DEL_SUSPECT_INFO(MEDIUM_MERGE, ls_id, ObTabletID(INT64_MAX));
}
bool enable_adaptive_compaction = enable_adaptive_compaction_;
ObTenantSysStat cur_sys_stat;
if (!enable_adaptive_compaction_) {
// do nothing
} else if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_sys_stat(cur_sys_stat))) {
LOG_WARN("failed to get tenant sys stat", K(tmp_ret), K(cur_sys_stat));
} else if (cur_sys_stat.is_full_cpu_usage()) {
enable_adaptive_compaction = false;
FLOG_INFO("disable adaptive compaction due to the high load CPU", K(ret), K(cur_sys_stat));
}
while (OB_SUCC(ret) && schedule_tablet_cnt < SCHEDULE_TABLET_BATCH_CNT) { // loop all tablet in ls
bool tablet_merge_finish = false;
if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(ls_handle, tablet_handle))) {
@ -1034,14 +1045,14 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge(
} else if (ObTimeUtility::fast_current_time() * 1000 <
tablet->get_medium_compaction_info_list().get_wait_check_medium_scn() + WAIT_MEDIUM_CHECK_THRESHOLD) {
// need wait 10 mins before schedule meta major
} else if (enable_adaptive_compaction_ && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) {
} else if (enable_adaptive_compaction && OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, new_handle))) {
if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) {
LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id));
}
}
}
if (could_schedule_next_medium && could_major_merge
&& (!tablet_merge_finish || enable_adaptive_compaction_ || check_medium_finish)
&& (!tablet_merge_finish || enable_adaptive_compaction || check_medium_finish)
&& OB_TMP_FAIL(func.schedule_next_medium_for_leader(
tablet_merge_finish ? 0 : merge_version, schedule_stats_))) { // schedule another round
LOG_WARN("failed to schedule next medium", K(tmp_ret), K(ls_id), K(tablet_id));

View File

@ -9,6 +9,8 @@
#include "share/ob_force_print_log.h"
#include "share/ob_thread_mgr.h"
#include "storage/ob_tenant_tablet_stat_mgr.h"
#include "observer/ob_server_struct.h"
#include "observer/ob_server.h"
using namespace oceanbase;
using namespace oceanbase::common;
@ -105,17 +107,28 @@ bool ObTabletStat::is_valid() const
bool ObTabletStat::check_need_report() const
{
bool bret = false;
ObTabletID tablet_id(tablet_id_);
if (0 != query_cnt_) { // report by query
if (QUERY_REPORT_MIN_ROW_CNT <= scan_physical_row_cnt_ ||
QUERY_REPORT_MIN_MICRO_BLOCK_CNT <= scan_micro_block_cnt_ ||
QUERY_REPORT_MIN_SCAN_TABLE_CNT <= exist_row_total_table_cnt_) {
if (tablet_id.is_ls_inner_tablet()) {
// do nothing
} else if (0 < merge_cnt_) { // report by compaction
bret = get_total_merge_row_count() >= MERGE_REPORT_MIN_ROW_CNT;
} else if (0 < query_cnt_) { // only report the slow query
const int64_t boost_factor = tablet_id.is_inner_tablet() ? 2 : 1;
if (scan_physical_row_cnt_ > 0 &&
scan_physical_row_cnt_ >= scan_logical_row_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
bret = true;
}
if (!bret && scan_micro_block_cnt_ > 0 &&
scan_micro_block_cnt_ >= pushdown_micro_block_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
bret = true;
}
if (!bret && exist_row_total_table_cnt_ > 0 &&
exist_row_total_table_cnt_ >= exist_row_read_table_cnt_ * QUERY_REPORT_INEFFICIENT_THRESHOLD * boost_factor) {
bret = true;
}
} else if (0 != merge_cnt_) { // report by compaction
bret = MERGE_REPORT_MIN_ROW_CNT <= insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
} else { // invalid tablet stat
bret = false;
}
return bret;
}
@ -166,74 +179,96 @@ ObTabletStat& ObTabletStat::archive(int64_t factor)
return *this;
}
bool ObTabletStat::is_hot_tablet() const
/************************************* ObTabletStatAnalyzer *************************************/
bool ObTabletStatAnalyzer::is_hot_tablet() const
{
return query_cnt_ + merge_cnt_ >= ACCESS_FREQUENCY;
return tablet_stat_.query_cnt_ + tablet_stat_.merge_cnt_ >= ACCESS_FREQUENCY * boost_factor_;
}
bool ObTabletStat::is_insert_mostly() const
bool ObTabletStatAnalyzer::is_insert_mostly() const
{
bool bret = false;
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
ObTabletID tablet_id(tablet_stat_.tablet_id_);
uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count();
if (tablet_id.is_inner_tablet() || tablet_id.is_ls_inner_tablet()) {
// do nothing
} else if (0 == tablet_stat_.insert_row_cnt_) {
// no insert occurs
} else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) {
// do nothing
} else {
bret = insert_row_cnt_ * BASE_FACTOR / total_row_cnt >= INSERT_PIVOT_FACTOR;
bret = total_row_cnt * LOAD_THRESHOLD <= tablet_stat_.insert_row_cnt_ * BASE_FACTOR;
}
return bret;
}
bool ObTabletStat::is_update_mostly() const
bool ObTabletStatAnalyzer::is_update_or_delete_mostly() const
{
bool bret = false;
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
uint64_t total_row_cnt = tablet_stat_.get_total_merge_row_count();
if (0 == tablet_stat_.delete_row_cnt_ + tablet_stat_.update_row_cnt_) {
// no update && delete occurs
} else if (total_row_cnt < MERGE_BASIC_ROW_CNT * boost_factor_) {
// do nothing
} else {
bret = update_row_cnt_ * BASE_FACTOR / total_row_cnt >= UPDATE_PIVOT_FACTOR;
bret = total_row_cnt * TOMBSTONE_THRESHOLD * boost_factor_ <= (tablet_stat_.update_row_cnt_ + tablet_stat_.delete_row_cnt_) * BASE_FACTOR;
}
return bret;
}
bool ObTabletStat::is_delete_mostly() const
bool ObTabletStatAnalyzer::has_slow_query() const
{
bool bret = false;
uint64_t total_row_cnt = insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_;
if (total_row_cnt < BASIC_ROW_CNT_THRESHOLD) {
// do nothing
} else {
bret = delete_row_cnt_ * BASE_FACTOR / total_row_cnt >= DELETE_PIVOT_FACTOR;
// all tablet query stats are ineffecient, only check the basic threshold
if (tablet_stat_.scan_physical_row_cnt_ >= QUERY_BASIC_ROW_CNT * boost_factor_ ||
tablet_stat_.scan_micro_block_cnt_ >= QUERY_BASIC_MICRO_BLOCK_CNT * boost_factor_ ||
tablet_stat_.exist_row_total_table_cnt_ >= QUERY_BASIC_ITER_TABLE_CNT * boost_factor_) {
bret = true;
}
return bret;
}
bool ObTabletStat::is_inefficient_scan() const
/************************************* ObTenantSysStat *************************************/
ObTenantSysStat::ObTenantSysStat()
: cpu_usage_percentage_(0),
min_cpu_cnt_(0),
max_cpu_cnt_(0),
memory_hold_(0),
memory_limit_(0)
{
}
void ObTenantSysStat::reset()
{
cpu_usage_percentage_ = 0;
min_cpu_cnt_ = 0;
max_cpu_cnt_ = 0;
memory_hold_ = 0;
memory_limit_ = 0;
}
bool ObTenantSysStat::is_small_tenant() const
{
bool bret = false;
if (0 == scan_logical_row_cnt_ || scan_logical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) {
} else {
bret = scan_physical_row_cnt_ / scan_logical_row_cnt_ >= SCAN_READ_FACTOR;
}
// 8c16g
const int64_t cpu_threshold = 8;
// When the tenant memory exceeds 10GB, the meta tenant occupies at least 10% of the memory.
const int64_t mem_threshold = (16L << 30) * 9 / 10;
bret = max_cpu_cnt_ < cpu_threshold || memory_limit_ < mem_threshold;
return bret;
}
bool ObTabletStat::is_inefficient_insert() const
bool ObTenantSysStat::is_full_cpu_usage() const
{
bool bret = false;
if (0 == exist_row_total_table_cnt_ || exist_row_total_table_cnt_ < BASIC_TABLE_CNT_THRESHOLD) {
if (is_small_tenant()) {
bret = max_cpu_cnt_ * 60 <= cpu_usage_percentage_;
} else {
bret = exist_row_read_table_cnt_ * BASE_FACTOR / exist_row_total_table_cnt_ >= EXIST_READ_FACTOR;
}
return bret;
}
bool ObTabletStat::is_inefficient_pushdown() const
{
bool bret = false;
if (0 == scan_micro_block_cnt_ || scan_micro_block_cnt_ < BASIC_MICRO_BLOCK_CNT_THRESHOLD) {
} else {
bret = pushdown_micro_block_cnt_ < scan_micro_block_cnt_ / SCAN_READ_FACTOR;
bret = max_cpu_cnt_ * 70 <= cpu_usage_percentage_;
}
return bret;
}
@ -648,6 +683,44 @@ int ObTenantTabletStatMgr::get_history_tablet_stats(
return ret;
}
int ObTenantTabletStatMgr::get_tablet_analyzer(
const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id,
ObTabletStatAnalyzer &analyzer)
{
int ret = OB_SUCCESS;
ObTenantSysStat sys_stat;
if (OB_FAIL(get_latest_tablet_stat(ls_id, tablet_id, analyzer.tablet_stat_))) {
LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id));
} else if (OB_FAIL(get_sys_stat(sys_stat))) {
LOG_WARN("failed to get sys stat", K(ret));
} else {
analyzer.is_small_tenant_ = sys_stat.is_small_tenant();
analyzer.boost_factor_ = analyzer.is_small_tenant_ ? 2 : 1;
}
return ret;
}
int ObTenantTabletStatMgr::get_sys_stat(ObTenantSysStat &sys_stat)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObTenantTabletStatMgr not inited", K(ret));
} else if (OB_FAIL(GCTX.omt_->get_tenant_cpu_usage(MTL_ID(), sys_stat.cpu_usage_percentage_))) {
LOG_WARN("failed to get tenant cpu usage", K(ret), K(sys_stat));
} else if (OB_FAIL(GCTX.omt_->get_tenant_cpu(MTL_ID(), sys_stat.min_cpu_cnt_, sys_stat.max_cpu_cnt_))) {
LOG_WARN("failed to get tenant cpu count", K(ret), K(sys_stat));
} else {
sys_stat.memory_hold_ = lib::get_tenant_memory_hold(MTL_ID());
sys_stat.memory_limit_ = lib::get_tenant_memory_limit(MTL_ID());
sys_stat.cpu_usage_percentage_ *= 100;
}
return ret;
}
int ObTenantTabletStatMgr::update_tablet_stream(const ObTabletStat &report_stat)
{
int ret = OB_SUCCESS;
@ -723,7 +796,7 @@ int ObTenantTabletStatMgr::fetch_node(ObTabletStreamNode *&node)
void ObTenantTabletStatMgr::process_stats()
{
int tmp_ret = OB_SUCCESS;
int ret = OB_SUCCESS;
const uint64_t start_idx = report_cursor_;
const uint64_t pending_cur = ATOMIC_LOAD(&pending_cursor_);
uint64_t end_idx = (pending_cur > start_idx + DEFAULT_MAX_PENDING_CNT)
@ -734,10 +807,10 @@ void ObTenantTabletStatMgr::process_stats()
} else {
for (uint64_t i = start_idx; i < end_idx; ++i) {
const ObTabletStat &cur_stat = report_queue_[i % DEFAULT_MAX_PENDING_CNT];
if (!cur_stat.is_valid()) {
if (OB_UNLIKELY(!cur_stat.is_valid())) {
// allow dirty read
} else if (OB_TMP_FAIL(update_tablet_stream(cur_stat))) {
LOG_WARN_RET(tmp_ret, "failed to update tablet stat", K(tmp_ret), K(cur_stat));
} else if (OB_FAIL(update_tablet_stream(cur_stat))) {
LOG_WARN_RET(ret, "failed to update tablet stat", K(ret), K(cur_stat));
}
}
report_cursor_ = pending_cur; // only TabletStatUpdater update this value.

View File

@ -14,6 +14,7 @@
#include "lib/allocator/page_arena.h"
#include "lib/allocator/ob_fifo_allocator.h"
#include "lib/lock/ob_bucket_lock.h"
#include "lib/lock/ob_tc_rwlock.h"
#include "lib/queue/ob_fixed_queue.h"
#include "lib/list/ob_dlist.h"
@ -69,36 +70,18 @@ public:
void reset() { MEMSET(this, 0, sizeof(ObTabletStat)); }
bool is_valid() const;
bool check_need_report() const;
int64_t get_total_merge_row_count() const { return insert_row_cnt_ + update_row_cnt_ + delete_row_cnt_; }
ObTabletStat& operator=(const ObTabletStat &other);
ObTabletStat& operator+=(const ObTabletStat &other);
ObTabletStat& archive(int64_t factor);
bool is_hot_tablet() const;
bool is_insert_mostly() const;
bool is_update_mostly() const;
bool is_delete_mostly() const;
bool is_inefficient_scan() const;
bool is_inefficient_insert() const;
bool is_inefficient_pushdown() const;
TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(query_cnt), K_(merge_cnt), K_(scan_logical_row_cnt),
K_(scan_physical_row_cnt), K_(scan_micro_block_cnt), K_(pushdown_micro_block_cnt),
K_(exist_row_total_table_cnt), K_(exist_row_read_table_cnt), K_(insert_row_cnt),
K_(update_row_cnt), K_(delete_row_cnt));
public:
static constexpr int64_t ACCESS_FREQUENCY = 5;
static constexpr int64_t BASE_FACTOR = 10;
static constexpr int64_t INSERT_PIVOT_FACTOR = 5;
static constexpr int64_t UPDATE_PIVOT_FACTOR = 4;
static constexpr int64_t DELETE_PIVOT_FACTOR = 3;
static constexpr int64_t SCAN_READ_FACTOR = 2;
static constexpr int64_t EXIST_READ_FACTOR = 7;
static constexpr int64_t BASIC_TABLE_CNT_THRESHOLD = 5;
static constexpr int64_t BASIC_MICRO_BLOCK_CNT_THRESHOLD = 16;
static constexpr int64_t BASIC_ROW_CNT_THRESHOLD = 10000; // TODO(@Danling) make it a comfiguration item
static constexpr int64_t QUERY_REPORT_MIN_ROW_CNT = 100;
static constexpr int64_t QUERY_REPORT_MIN_MICRO_BLOCK_CNT = 10;
static constexpr int64_t QUERY_REPORT_MIN_SCAN_TABLE_CNT = 2;
static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 100;
static constexpr int64_t QUERY_REPORT_INEFFICIENT_THRESHOLD = 3;
static constexpr int64_t MERGE_REPORT_MIN_ROW_CNT = 1000;
public:
int64_t ls_id_;
uint64_t tablet_id_;
@ -116,6 +99,52 @@ public:
};
struct ObTabletStatAnalyzer
{
public:
ObTabletStatAnalyzer() = default;
~ObTabletStatAnalyzer() = default;
bool is_hot_tablet() const;
bool is_insert_mostly() const;
bool is_update_or_delete_mostly() const;
bool has_slow_query() const;
TO_STRING_KV(K_(tablet_stat), K_(is_small_tenant), K_(boost_factor));
public:
static constexpr int64_t ACCESS_FREQUENCY = 5;
static constexpr int64_t BASE_FACTOR = 10;
static constexpr int64_t LOAD_THRESHOLD = 7;
static constexpr int64_t TOMBSTONE_THRESHOLD = 3;
static constexpr int64_t QUERY_BASIC_ROW_CNT = 1000;
static constexpr int64_t QUERY_BASIC_MICRO_BLOCK_CNT = 10;
static constexpr int64_t QUERY_BASIC_ITER_TABLE_CNT = 5;
static constexpr int64_t MERGE_BASIC_ROW_CNT = 10000;
public:
ObTabletStat tablet_stat_;
int64_t boost_factor_;
bool is_small_tenant_;
};
struct ObTenantSysStat
{
public:
ObTenantSysStat();
~ObTenantSysStat() = default;
void reset();
bool is_small_tenant() const;
bool is_full_cpu_usage() const;
TO_STRING_KV(K_(cpu_usage_percentage), K_(min_cpu_cnt), K_(max_cpu_cnt), K_(memory_hold), K_(memory_limit));
public:
static constexpr double EPS = 1e-9;
double cpu_usage_percentage_;
double min_cpu_cnt_;
double max_cpu_cnt_;
int64_t memory_hold_;
int64_t memory_limit_;
};
template<uint32_t SIZE>
class ObTabletStatBucket
{
@ -302,6 +331,11 @@ public:
const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id,
common::ObIArray<ObTabletStat> &tablet_stats);
int get_tablet_analyzer(
const share::ObLSID &ls_id,
const common::ObTabletID &tablet_id,
ObTabletStatAnalyzer &analyzer);
int get_sys_stat(ObTenantSysStat &sys_stat);
void process_stats();
void refresh_all(const int64_t step);
private:
@ -326,18 +360,18 @@ private:
static constexpr int64_t TABLET_STAT_PROCESS_INTERVAL = 5 * 1000L * 1000L; //5s
static constexpr int64_t CHECK_INTERVAL = 120L * 1000L * 1000L; //120s
static constexpr int64_t CHECK_RUNNING_TIME_INTERVAL = 120L * 1000L * 1000L; //120s
static constexpr int64_t DUMP_TABLET_STAT_INTERVAL = 60 * 1000LL * 1000LL; //60s
static constexpr int64_t CHECK_SYS_STAT_INTERVAL = 10 * 1000LL * 1000LL; //10s
static constexpr int32_t DEFAULT_MAX_FREE_STREAM_CNT = 10000;
static constexpr int32_t DEFAULT_UP_LIMIT_STREAM_CNT = 20000;
static constexpr int32_t DEFAULT_BUCKET_NUM = 1000;
static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 20000;
static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 40000;
static constexpr int32_t MAX_REPORT_RETRY_CNT = 5;
TabletStatUpdater report_stat_task_;
ObTabletStreamPool stream_pool_;
TabletStreamMap stream_map_;
common::ObBucketLock bucket_lock_;
ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT];
ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT]; // 12 * 8 * 40000 bytes
uint64_t report_cursor_;
uint64_t pending_cursor_;
int report_tg_id_;

View File

@ -113,7 +113,7 @@ void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num)
for (int64_t i = 0; i < report_num; ++i) {
ObTabletStat curr_stat;
curr_stat.ls_id_ = 1;
curr_stat.tablet_id_ = 10001 + i;
curr_stat.tablet_id_ = 300001 + i;
curr_stat.query_cnt_ = 100 * (i + 1);
curr_stat.scan_physical_row_cnt_ = 10000 + i;
@ -201,10 +201,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stream)
{
ObTabletStat tablet_stat;
tablet_stat.ls_id_ = 1;
tablet_stat.tablet_id_ = 1;
tablet_stat.tablet_id_ = 200123;
tablet_stat.query_cnt_ = 100;
tablet_stat.scan_logical_row_cnt_ = 100;
tablet_stat.scan_physical_row_cnt_ = 100;
tablet_stat.scan_logical_row_cnt_ = 1000000;
tablet_stat.scan_physical_row_cnt_ = 1000000;
ObTabletStream stream;
auto &curr_buckets = stream.curr_buckets_;
@ -380,10 +380,10 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr)
ObTabletStat tablet_stat;
tablet_stat.ls_id_ = 1;
tablet_stat.tablet_id_ = 123;
tablet_stat.tablet_id_ = 200123;
tablet_stat.query_cnt_ = 100;
tablet_stat.scan_logical_row_cnt_ = 100;
tablet_stat.scan_physical_row_cnt_ = 100;
tablet_stat.scan_logical_row_cnt_ = 100000;
tablet_stat.scan_physical_row_cnt_ = 1000000;
bool report_succ = false;
ret = stat_mgr_->report_stat(tablet_stat, report_succ);
@ -392,7 +392,7 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr)
ObTabletStat res;
share::ObLSID ls_id(1);
common::ObTabletID tablet_id(123);
common::ObTabletID tablet_id(200123);
ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res);
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(100, res.query_cnt_);