From 9496d4c6fbdc43249e7c37f71dcb36e3693e4d45 Mon Sep 17 00:00:00 2001 From: zhjc1124 Date: Thu, 2 Jan 2025 09:45:04 +0000 Subject: [PATCH] fix background cpu set --- src/observer/omt/ob_tenant.cpp | 5 +- src/share/io/ob_io_define.h | 1 - src/share/resource_manager/ob_cgroup_ctrl.cpp | 57 +++++++++---------- src/share/resource_manager/ob_cgroup_ctrl.h | 2 + .../ob_resource_plan_manager.cpp | 50 ++++++++-------- 5 files changed, 59 insertions(+), 56 deletions(-) diff --git a/src/observer/omt/ob_tenant.cpp b/src/observer/omt/ob_tenant.cpp index 7a0ab9f2c..7d62b7874 100644 --- a/src/observer/omt/ob_tenant.cpp +++ b/src/observer/omt/ob_tenant.cpp @@ -1185,9 +1185,10 @@ void ObTenant::set_unit_max_cpu(double cpu) { int tmp_ret = OB_SUCCESS; unit_max_cpu_ = cpu; - if (!cgroup_ctrl_.is_valid() || is_meta_tenant(id_)) { + if (!cgroup_ctrl_.is_valid() || is_sys_tenant(id_) || is_meta_tenant(id_)) { // do nothing - } else if (OB_TMP_FAIL(cgroup_ctrl_.set_cpu_cfs_quota(id_, is_sys_tenant(id_) ? -1 : cpu))) { + // meta tenant and sys tenant are unlimited + } else if (OB_TMP_FAIL(cgroup_ctrl_.set_cpu_cfs_quota(id_, cpu))) { _LOG_WARN_RET(tmp_ret, "set tenant cpu cfs quota failed, tenant_id=%lu, cpu=%.2f", id_, cpu); } } diff --git a/src/share/io/ob_io_define.h b/src/share/io/ob_io_define.h index bc828a2b3..8f6384d0f 100644 --- a/src/share/io/ob_io_define.h +++ b/src/share/io/ob_io_define.h @@ -102,7 +102,6 @@ enum ObIOModule { }; const int64_t SYS_MODULE_CNT = SYS_MODULE_END_ID - SYS_MODULE_START_ID; -static constexpr char BACKGROUND_CGROUP[] = "background"; const char *get_io_sys_group_name(ObIOModule module); struct ObIOFlag final diff --git a/src/share/resource_manager/ob_cgroup_ctrl.cpp b/src/share/resource_manager/ob_cgroup_ctrl.cpp index 4f23e3d06..1a754bed9 100644 --- a/src/share/resource_manager/ob_cgroup_ctrl.cpp +++ b/src/share/resource_manager/ob_cgroup_ctrl.cpp @@ -405,6 +405,9 @@ int ObCgroupCtrl::get_group_path( if (!is_valid()) { ret = OB_INVALID_CONFIG; } else if (!is_valid_tenant_id(tenant_id)) { + // if tenant_id is invalid, return "root_cgroup_path/[background_path]" + // if tenant_id is invalid, group_id should be invalid. + group_id = OB_INVALID_GROUP_ID; // gen root_cgroup_path if (is_background) { // background base, return "cgroup/background" @@ -420,12 +423,7 @@ int ObCgroupCtrl::get_group_path( snprintf(root_cgroup_path, path_bufsize, "%s", OBSERVER_ROOT_CGROUP_DIR); // gen tenant_path - if (!is_valid_tenant_id(tenant_id)) { - // do nothing - // if tenant_id is invalid, return "root_cgroup_path/[base_path]" - // if tenant_id is invalid, group_id should be invalid. - group_id = OB_INVALID_GROUP_ID; - } else if (is_meta_tenant(tenant_id)) { + if (is_meta_tenant(tenant_id)) { // tenant is meta tenant snprintf(user_tenant_path, PATH_BUFSIZE, "tenant_%04lu", gen_user_tenant_id(tenant_id)); snprintf(meta_tenant_path, PATH_BUFSIZE, "tenant_%04lu", tenant_id); @@ -451,7 +449,6 @@ int ObCgroupCtrl::get_group_path( if (REACH_TIME_INTERVAL(WARN_LOG_INTERVAL)) { LOG_WARN("fail to get group_name", K(tmp_ret), K(tenant_id), K(group_id), K(lbt())); } - ret = OB_SUCCESS; // ignore error } else { group_name = g_name.get_value().ptr(); } @@ -522,7 +519,7 @@ int ObCgroupCtrl::add_thread_to_cgroup_( } else if (OB_FAIL(set_cgroup_config_(group_path, TASKS_FILE, tid_value))) { LOG_WARN("add tid to cgroup failed", K(ret), K(group_path), K(tid_value), K(tenant_id)); } else { - LOG_DEBUG("add tid to cgroup success", K(group_path), K(tid_value), K(tenant_id), K(group_id)); + LOG_INFO("add tid to cgroup success", K(group_path), K(tid_value), K(tenant_id), K(group_id)); } } return ret; @@ -664,28 +661,30 @@ int ObCgroupCtrl::set_cpu_cfs_quota_( double target_cpu = cpu; double base_cpu = -1; - // background quota limit - if (is_valid_tenant_id(tenant_id) && is_background) { - int compare_ret = 0; - if (OB_FAIL(get_cpu_cfs_quota(OB_INVALID_TENANT_ID, base_cpu, OB_INVALID_GROUP_ID, is_background))) { - LOG_WARN("get background cpu cfs quota failed", K(ret), K(tenant_id)); - } else if (OB_FAIL(compare_cpu(target_cpu, base_cpu, compare_ret))) { - LOG_WARN("compare cpu failed", K(ret), K(target_cpu), K(base_cpu)); - } else if (compare_ret > 0) { - target_cpu = base_cpu; + if (-1 != target_cpu) { + // background quota limit + if (is_valid_tenant_id(tenant_id) && is_background) { + int compare_ret = 0; + if (OB_FAIL(get_cpu_cfs_quota(OB_INVALID_TENANT_ID, base_cpu, OB_INVALID_GROUP_ID, is_background))) { + LOG_WARN("get background cpu cfs quota failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(compare_cpu(target_cpu, base_cpu, compare_ret))) { + LOG_WARN("compare cpu failed", K(ret), K(target_cpu), K(base_cpu)); + } else if (compare_ret > 0) { + target_cpu = base_cpu; + } } - } - // tenant quota limit - double tenant_cpu = -1; - if (OB_SUCC(ret) && is_valid_group(group_id)) { - int compare_ret = 0; - if (OB_FAIL(get_cpu_cfs_quota(tenant_id, tenant_cpu, OB_INVALID_GROUP_ID, is_background))) { - LOG_WARN("get tenant cpu cfs quota failed", K(ret), K(tenant_id)); - } else if (OB_FAIL(compare_cpu(target_cpu, tenant_cpu, compare_ret))) { - LOG_WARN("compare cpu failed", K(ret), K(target_cpu), K(tenant_cpu)); - } else if (compare_ret > 0) { - target_cpu = tenant_cpu; + // tenant quota limit + double tenant_cpu = -1; + if (OB_SUCC(ret) && is_valid_group(group_id)) { + int compare_ret = 0; + if (OB_FAIL(get_cpu_cfs_quota(tenant_id, tenant_cpu, OB_INVALID_GROUP_ID, is_background))) { + LOG_WARN("get tenant cpu cfs quota failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(compare_cpu(target_cpu, tenant_cpu, compare_ret))) { + LOG_WARN("compare cpu failed", K(ret), K(target_cpu), K(tenant_cpu)); + } else if (compare_ret > 0) { + target_cpu = tenant_cpu; + } } } @@ -742,7 +741,7 @@ int ObCgroupCtrl::recursion_dec_cpu_cfs_quota_(const char *group_path, const dou if (OB_FAIL(ObCgroupCtrl::get_cpu_cfs_quota_by_path_(curr_path, current_cpu))) { LOG_WARN("get cpu cfs quota failed", K(ret), K(curr_path)); } else if ((!is_top_dir && -1 == current_cpu) || - (OB_SUCC(ObCgroupCtrl::compare_cpu(cpu_, current_cpu, compare_ret)) && compare_ret >= 0)) { + (OB_SUCC(ObCgroupCtrl::compare_cpu(cpu_, current_cpu, compare_ret)) && compare_ret > 0)) { // do nothing } else if (OB_FAIL(ObCgroupCtrl::set_cpu_cfs_quota_by_path_(curr_path, cpu_))) { LOG_WARN("set cpu cfs quota failed", K(curr_path), K(cpu_)); diff --git a/src/share/resource_manager/ob_cgroup_ctrl.h b/src/share/resource_manager/ob_cgroup_ctrl.h index 0d640e07e..08192d8e7 100644 --- a/src/share/resource_manager/ob_cgroup_ctrl.h +++ b/src/share/resource_manager/ob_cgroup_ctrl.h @@ -30,6 +30,7 @@ namespace share { class ObGroupName; class ObTenantBase; +class ObResourcePlanManager; typedef enum : uint64_t { DEFAULT = 0, @@ -207,6 +208,7 @@ private: private: friend class oceanbase::omt::ObTenant; friend class oceanbase::share::ObTenantBase; + friend class oceanbase::share::ObResourcePlanManager; friend int oceanbase::lib::SET_GROUP_ID(uint64_t group_id, bool is_background); int add_self_to_cgroup_(const uint64_t tenant_id, const uint64_t group_id = OBCG_DEFAULT, const bool is_background = false); int add_thread_to_cgroup_(const int64_t tid,const uint64_t tenant_id, const uint64_t group_id = OBCG_DEFAULT, const bool is_background = false); diff --git a/src/share/resource_manager/ob_resource_plan_manager.cpp b/src/share/resource_manager/ob_resource_plan_manager.cpp index 463b3393d..485296637 100644 --- a/src/share/resource_manager/ob_resource_plan_manager.cpp +++ b/src/share/resource_manager/ob_resource_plan_manager.cpp @@ -96,39 +96,41 @@ int ObResourcePlanManager::refresh_global_background_cpu() LOG_WARN("fail to set background cpu cfs quota", K(ret)); } else { if (compare_ret < 0) { + const int64_t phy_cpu_cnt = sysconf(_SC_NPROCESSORS_ONLN); int tmp_ret = OB_SUCCESS; omt::TenantIdList ids; GCTX.omt_->get_tenant_ids(ids); for (uint64_t i = 0; i < ids.size(); i++) { uint64_t tenant_id = ids[i]; - double target_cpu = -1; - if (!is_virtual_tenant_id(tenant_id)) { - MTL_SWITCH(tenant_id) - { - target_cpu = MTL_CTX()->unit_max_cpu(); + if (is_sys_tenant(tenant_id) || is_meta_tenant(tenant_id)) { + // do nothing + // meta tenant and sys tenant are unlimited + } else { + double target_cpu = -1; + if (OB_DTL_TENANT_ID == tenant_id) { + target_cpu = (phy_cpu_cnt <= 4) ? 1.0 : OB_DTL_CPU; + } else if (OB_DATA_TENANT_ID == tenant_id) { + target_cpu = (phy_cpu_cnt <= 4) ? 1.0 : OB_DATA_CPU; + } else if (!is_virtual_tenant_id(tenant_id)) { + MTL_SWITCH(tenant_id) + { + target_cpu = MTL_CTX()->unit_max_cpu(); + } + } + if (OB_TMP_FAIL(GCTX.cgroup_ctrl_->compare_cpu(target_cpu, cpu, compare_ret))) { + LOG_WARN_RET(tmp_ret, "compare tenant cpu failed", K(tmp_ret), K(tenant_id)); + } else if (compare_ret > 0) { + target_cpu = cpu; } - } - if (OB_TMP_FAIL(GCTX.cgroup_ctrl_->compare_cpu(target_cpu, cpu, compare_ret))) { - LOG_WARN_RET(tmp_ret, "compare tenant cpu failed", K(tmp_ret), K(tenant_id)); - } else if (compare_ret > 0) { - target_cpu = cpu; - } - if (OB_TMP_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota( - tenant_id, target_cpu, OB_INVALID_GROUP_ID, true /* is_background */))) { - LOG_WARN_RET(tmp_ret, "set tenant cpu cfs quota failed", K(tmp_ret), K(tenant_id)); - } else if (OB_TMP_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota( - tenant_id, target_cpu, USER_RESOURCE_OTHER_GROUP_ID, true /* is_background */))) { - LOG_WARN_RET(tmp_ret, "set tenant cpu cfs quota failed", K(ret), K(tenant_id)); - } else if (is_user_tenant(tenant_id)) { - uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); if (OB_TMP_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota( - meta_tenant_id, target_cpu, OB_INVALID_GROUP_ID, true /* is_background */))) { - LOG_WARN_RET(tmp_ret, "set tenant cpu cfs quota failed", K(tmp_ret), K(meta_tenant_id)); + tenant_id, target_cpu, OB_INVALID_GROUP_ID, true /* is_background */))) { + LOG_WARN_RET(tmp_ret, "set tenant cpu cfs quota failed", K(tmp_ret), K(tenant_id)); } } } } - + } + if (OB_SUCC(ret) && 0 != compare_ret) { background_quota_ = cpu; } } @@ -343,7 +345,7 @@ int ObResourcePlanManager::flush_directive_to_cgroup_fs(ObPlanDirectiveSet &dire double tenant_cpu_quota = 0; if (OB_FAIL(GCTX.cgroup_ctrl_->get_cpu_cfs_quota(d.tenant_id_, tenant_cpu_quota, OB_INVALID_GROUP_ID))) { LOG_WARN("fail get cpu quota", K(d), K(ret)); - } else if (OB_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota(d.tenant_id_, + } else if (OB_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota_(d.tenant_id_, -1 == tenant_cpu_quota ? -1 : tenant_cpu_quota * d.utilization_limit_ / 100, d.group_id_))) { LOG_ERROR( @@ -353,7 +355,7 @@ int ObResourcePlanManager::flush_directive_to_cgroup_fs(ObPlanDirectiveSet &dire if (OB_FAIL(GCTX.cgroup_ctrl_->get_cpu_cfs_quota( d.tenant_id_, tenant_cpu_quota, OB_INVALID_GROUP_ID, true /* is_background */))) { LOG_WARN("fail get cpu quota", K(d), K(ret)); - } else if (OB_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota(d.tenant_id_, + } else if (OB_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota_(d.tenant_id_, -1 == tenant_cpu_quota ? -1 : tenant_cpu_quota * d.utilization_limit_ / 100, d.group_id_, true /* is_background */))) {