Enhanced resource isolation capability, previous IOPS group control for front-end and back-end tasks
Co-authored-by: Charles0429 <xiezhenjiang@gmail.com> Co-authored-by: raywill <hustos@gmail.com>
This commit is contained in:
@ -13,6 +13,7 @@
|
||||
#define USING_LOG_PREFIX SHARE
|
||||
#include "ob_resource_plan_manager.h"
|
||||
#include "lib/string/ob_string.h"
|
||||
#include "share/io/ob_io_manager.h"
|
||||
#include "share/resource_manager/ob_resource_manager_proxy.h"
|
||||
#include "share/resource_manager/ob_cgroup_ctrl.h"
|
||||
#include "observer/ob_server_struct.h"
|
||||
@ -24,43 +25,134 @@ using namespace oceanbase::share;
|
||||
int ObResourcePlanManager::init()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
LOG_INFO("resource plan manager init ok");
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::refresh_resource_plan(uint64_t tenant_id, ObString &plan_name)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObResourceManagerProxy proxy;
|
||||
// 目前每个租户最多只有 2 个 活跃 directive : interactive, batch
|
||||
ObPlanDirectiveSet directives;
|
||||
if (OB_ISNULL(GCTX.cgroup_ctrl_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("cgroup ctrl is null", K(ret));
|
||||
} else if (!GCTX.cgroup_ctrl_->is_valid()) {
|
||||
ret = OB_EAGAIN;
|
||||
// cgroup ctrl 没有初始化成功,可能是没有 cgroup fs、没有权限等原因
|
||||
// 此时不再继续后继资源隔离操作
|
||||
} else if (OB_FAIL(proxy.get_all_plan_directives(tenant_id, plan_name, directives))) {
|
||||
LOG_WARN("fail get plan directive", K(tenant_id), K(plan_name), K(ret));
|
||||
if (tenant_plan_map_.created()) {
|
||||
ret = OB_INIT_TWICE;
|
||||
LOG_WARN("mapping rule manager should not init multiple times", K(ret));
|
||||
} else if (OB_FAIL(tenant_plan_map_.create(7, "TENANT_PLAN_MAP"))) {
|
||||
LOG_WARN("fail create tenant_plan_map", K(ret));
|
||||
} else {
|
||||
// directive => cgroup share/cfs_cpu_quota 转换。2 步:
|
||||
// step1: 以 100 为总值做归一化
|
||||
// step2: 将值转化成 cgroup 值 (utilization=>cfs_cpu_quota 的值和 cpu 核数等有关)
|
||||
// - 如果 utilization = 100,那么 cfs_cpu_quota = -1
|
||||
if (OB_FAIL(create_cgroup_dir_if_not_exist(directives))) {
|
||||
LOG_WARN("fail create cgroup dir", K(directives), K(ret));
|
||||
} else if (OB_FAIL(normalize_directives(directives))) {
|
||||
LOG_WARN("fail normalize directive", K(ret));
|
||||
} else if (OB_FAIL(flush_directive_to_cgroup_fs(directives))) {
|
||||
LOG_WARN("fail flush directive to cgroup fs", K(ret));
|
||||
}
|
||||
LOG_INFO("refresh_resource_plan", K(tenant_id), K(plan_name), K(directives));
|
||||
LOG_INFO("resource plan manager init ok");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::normalize_directives(ObPlanDirectiveSet &directives)
|
||||
int ObResourcePlanManager::switch_resource_plan(const uint64_t tenant_id, ObString &plan_name)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObResMgrVarcharValue origin_plan;
|
||||
ObResMgrVarcharValue cur_plan(plan_name);
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(tenant_plan_map_.get_refactored(tenant_id, origin_plan))) {
|
||||
if (OB_HASH_NOT_EXIST == ret) {
|
||||
// initialize
|
||||
if (OB_FAIL(tenant_plan_map_.set_refactored(tenant_id, cur_plan))) {
|
||||
LOG_WARN("set plan failed", K(ret), K(tenant_id));
|
||||
} else {
|
||||
LOG_INFO("add tenant id plan success", K(tenant_id), K(cur_plan));
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("get plan failed", K(ret), K(tenant_id));
|
||||
}
|
||||
} else if (origin_plan != cur_plan) {
|
||||
// switch plan,reset 原来plan下对应directive的io资源
|
||||
ObResourceManagerProxy proxy;
|
||||
ObPlanDirectiveSet directives;
|
||||
if (OB_FAIL(proxy.get_all_plan_directives(tenant_id, origin_plan.get_value(), directives))) {
|
||||
LOG_WARN("fail get plan directive", K(tenant_id), K(origin_plan), K(ret));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < directives.count(); ++i) {
|
||||
const ObPlanDirective &cur_directive = directives.at(i);
|
||||
if (OB_FAIL(GCTX.cgroup_ctrl_->reset_group_iops(
|
||||
tenant_id,
|
||||
1,
|
||||
cur_directive.group_name_))) {
|
||||
LOG_ERROR("reset old plan group directive failed", K(cur_directive), K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret) && plan_name.empty()) {
|
||||
// reset user and function hashmap
|
||||
if (OB_FAIL(proxy.reset_all_mapping_rules())) {
|
||||
LOG_WARN("fail reset all group rules",K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (OB_FAIL(tenant_plan_map_.set_refactored(tenant_id, cur_plan, 1))) { //overrite
|
||||
LOG_WARN("set plan failed", K(ret), K(tenant_id));
|
||||
} else {
|
||||
LOG_INFO("switch resource plan success", K(tenant_id), K(origin_plan), K(cur_plan));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::refresh_resource_plan(const uint64_t tenant_id, ObString &plan_name)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObResourceManagerProxy proxy;
|
||||
ObPlanDirectiveSet directives;
|
||||
ObPlanDirective other_directive; // for OTHER_GROUPS
|
||||
other_directive.set_group_id(0);
|
||||
other_directive.set_tenant_id(tenant_id);
|
||||
// 首先check plan是否发生了切换,如果plan切换那么原plan中资源设置先清零
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(switch_resource_plan(tenant_id, plan_name))) {
|
||||
LOG_WARN("check resource plan failed", K(ret), K(tenant_id), K(plan_name));
|
||||
} else if (OB_FAIL(proxy.get_all_plan_directives(tenant_id, plan_name, directives))) {
|
||||
LOG_WARN("fail get plan directive", K(tenant_id), K(plan_name), K(ret));
|
||||
} else if (OB_FAIL(normalize_iops_directives(tenant_id, directives, other_directive))) {
|
||||
LOG_WARN("fail normalize directive", K(ret));
|
||||
} else if (OB_FAIL(flush_directive_to_iops_control(tenant_id, directives, other_directive))) { // for IOPS
|
||||
LOG_WARN("fail flush directive to io control", K(ret));
|
||||
} else {
|
||||
if (OB_ISNULL(GCTX.cgroup_ctrl_) || !(GCTX.cgroup_ctrl_->is_valid())) {
|
||||
// do nothing,cgroup ctrl 没有初始化成功,可能是没有 cgroup fs、没有权限等原因
|
||||
// cgroup不生效无法对CPU资源进行隔离,但上述io资源隔离可以继续
|
||||
|
||||
|
||||
// directive => cgroup share/cfs_cpu_quota 转换。2 步:
|
||||
// step1: 以 100 为总值做归一化
|
||||
// step2: 将值转化成 cgroup 值 (utilization=>cfs_cpu_quota 的值和 cpu 核数等有关)
|
||||
// - 如果 utilization = 100,那么 cfs_cpu_quota = -1
|
||||
} else if (OB_FAIL(create_cgroup_dir_if_not_exist(directives))) {
|
||||
LOG_WARN("fail create cgroup dir", K(directives), K(ret));
|
||||
} else if (OB_FAIL(normalize_cpu_directives(directives))) {
|
||||
LOG_WARN("fail normalize directive", K(ret));
|
||||
} else if (OB_FAIL(flush_directive_to_cgroup_fs(directives))) { // for CPU
|
||||
LOG_WARN("fail flush directive to cgroup fs", K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
LOG_INFO("refresh resource plan success", K(tenant_id), K(plan_name), K(directives));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::get_cur_plan(const uint64_t tenant_id, ObResMgrVarcharValue &plan_name)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
plan_name.reset();
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(tenant_plan_map_.get_refactored(tenant_id, plan_name))) {
|
||||
if (OB_HASH_NOT_EXIST == ret) {
|
||||
//plan只有被使用才会放到map里
|
||||
ret = OB_SUCCESS;
|
||||
LOG_INFO("delete plan success with no_releated_io_module", K(plan_name), K(tenant_id));
|
||||
} else {
|
||||
LOG_WARN("get plan failed", K(ret), K(tenant_id), K(plan_name));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::normalize_cpu_directives(ObPlanDirectiveSet &directives)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t total_mgmt = 0;
|
||||
@ -122,6 +214,54 @@ int ObResourcePlanManager::normalize_directives(ObPlanDirectiveSet &directives)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::normalize_iops_directives(const uint64_t tenant_id,
|
||||
ObPlanDirectiveSet &directives,
|
||||
ObPlanDirective &other_group_directive)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
// 在本版本中,用户无法指定OTHER_GROUPS及其他默认组的资源,OTHER资源是使用其他资源组算出来的
|
||||
// OTHER MIN_IOPS = 100-SUM; MAX_IOPS = 100; WEIGHT_IOPS = 100/SUM;
|
||||
// 需要在产品手册中告知,建议用户不要把所有组的min_iops总和设置成100%
|
||||
|
||||
uint64_t total_weight = 0;
|
||||
uint64_t total_min = 0;
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < directives.count(); ++i) {
|
||||
ObPlanDirective &cur_directive = directives.at(i);
|
||||
if (cur_directive.group_id_ < GROUP_START_ID) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
// 理论上不应该出现
|
||||
LOG_WARN("unexpected error!!!", K(cur_directive));
|
||||
} else if (OB_UNLIKELY(!cur_directive.is_valid())) {
|
||||
ret = OB_INVALID_CONFIG;
|
||||
LOG_WARN("invalid group io config", K(cur_directive));
|
||||
} else {
|
||||
total_weight += cur_directive.weight_iops_;
|
||||
total_min += cur_directive.min_iops_;
|
||||
}
|
||||
}
|
||||
total_weight += OTHER_GROUPS_IOPS_WEIGHT; //OTHER GROUPS WEIGHT
|
||||
|
||||
if(OB_SUCC(ret)) {
|
||||
if (total_min > 100) {
|
||||
ret = OB_INVALID_CONFIG;
|
||||
LOG_WARN("invalid group io config", K(total_min));
|
||||
} else {
|
||||
for (int64_t i = 0; i < directives.count(); ++i) {
|
||||
ObPlanDirective &cur_directive = directives.at(i);
|
||||
cur_directive.weight_iops_ = 100 * cur_directive.weight_iops_ / total_weight;
|
||||
}
|
||||
other_group_directive.weight_iops_ = 100 * 100 / total_weight;
|
||||
other_group_directive.min_iops_ = 100 - total_min;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::create_cgroup_dir_if_not_exist(const ObPlanDirectiveSet &directives)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -190,10 +330,10 @@ int ObResourcePlanManager::flush_directive_to_cgroup_fs(ObPlanDirectiveSet &dire
|
||||
LOG_ERROR("fail set cpu shares. tenant isolation function may not functional!!",
|
||||
K(d), K(ret));
|
||||
} else if (OB_FAIL(GCTX.cgroup_ctrl_->set_cpu_cfs_quota(
|
||||
d.tenant_id_,
|
||||
d.level_,
|
||||
d.group_name_,
|
||||
static_cast<int32_t>(d.utilization_limit_)))) {
|
||||
d.tenant_id_,
|
||||
d.level_,
|
||||
d.group_name_,
|
||||
static_cast<int32_t>(d.utilization_limit_)))) {
|
||||
LOG_ERROR("fail set cpu quota. tenant isolation function may not functional!!",
|
||||
K(d), K(ret));
|
||||
}
|
||||
@ -201,3 +341,62 @@ int ObResourcePlanManager::flush_directive_to_cgroup_fs(ObPlanDirectiveSet &dire
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::flush_directive_to_iops_control(const uint64_t tenant_id,
|
||||
ObPlanDirectiveSet &directives,
|
||||
ObPlanDirective &other_group_directive)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < directives.count(); ++i) {
|
||||
const ObPlanDirective &cur_directive = directives.at(i);
|
||||
share::OBGroupIOInfo cur_io_info;
|
||||
if (OB_FAIL(cur_io_info.init(cur_directive.min_iops_, cur_directive.max_iops_, cur_directive.weight_iops_))) {
|
||||
LOG_ERROR("fail init group io info", K(cur_directive), K(ret));
|
||||
} else if (OB_FAIL(GCTX.cgroup_ctrl_->set_group_iops(
|
||||
cur_directive.tenant_id_,
|
||||
cur_directive.level_,
|
||||
cur_directive.group_id_,
|
||||
cur_io_info))) {
|
||||
LOG_ERROR("fail set iops. tenant isolation function may not functional!!",
|
||||
K(cur_directive), K(ret));
|
||||
}
|
||||
// ignore ret, continue
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
share::OBGroupIOInfo other_io_info;
|
||||
if (OB_FAIL(other_io_info.init(other_group_directive.min_iops_,
|
||||
other_group_directive.max_iops_,
|
||||
other_group_directive.weight_iops_))) {
|
||||
LOG_ERROR("fail init other group io info", K(other_group_directive), K(ret));
|
||||
} else if (OB_FAIL(GCTX.cgroup_ctrl_->set_group_iops(
|
||||
other_group_directive.tenant_id_,
|
||||
other_group_directive.level_,
|
||||
other_group_directive.group_id_,
|
||||
other_io_info))) {
|
||||
LOG_ERROR("fail set iops. tenant isolation function may not functional!!",
|
||||
K(other_group_directive), K(ret));
|
||||
} else if (OB_FAIL(refresh_tenant_group_io_config(tenant_id))) {
|
||||
LOG_WARN("refresh tenant io config failed", K(ret), K(tenant_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObResourcePlanManager::refresh_tenant_group_io_config(const uint64_t tenant_id) {
|
||||
int ret = OB_SUCCESS;
|
||||
ObRefHolder<ObTenantIOManager> tenant_holder;
|
||||
if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) {
|
||||
ret = OB_INVALID_TENANT_ID;
|
||||
LOG_WARN("invalid config", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(OB_IO_MANAGER.get_tenant_io_manager(tenant_id, tenant_holder))) {
|
||||
LOG_WARN("get tenant io manager failed", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(tenant_holder.get_ptr()->refresh_group_io_config())) {
|
||||
LOG_WARN("refresh group io config failed", K(ret), K(tenant_id));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
Reference in New Issue
Block a user