// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "agent/cgroups_mgr.h" #include #include #include #include #include #include #include #include #include #include #include "boost/filesystem.hpp" #include "common/logging.h" #include "olap/data_dir.h" #include "olap/storage_engine.h" #include "runtime/exec_env.h" #include "runtime/load_path_mgr.h" using std::string; using std::map; using std::vector; using std::stringstream; using apache::thrift::TException; using apache::thrift::transport::TTransportException; namespace doris { static CgroupsMgr *s_global_cg_mgr; const std::string CgroupsMgr::_s_system_user = "system"; const std::string CgroupsMgr::_s_system_group = "normal"; std::map CgroupsMgr::_s_resource_cgroups = {{TResourceType::type::TRESOURCE_CPU_SHARE, "cpu.shares"}, {TResourceType::type::TRESOURCE_IO_SHARE, "blkio.weight"}}; CgroupsMgr::CgroupsMgr(ExecEnv* exec_env, const string& root_cgroups_path) : _exec_env(exec_env), _root_cgroups_path(root_cgroups_path), _is_cgroups_init_success(false), _cur_version(-1) { if (s_global_cg_mgr == nullptr) { s_global_cg_mgr = this; } } CgroupsMgr::~CgroupsMgr() { } AgentStatus CgroupsMgr::update_local_cgroups(const TFetchResourceResult& new_fetched_resource) { std::lock_guard lck(_update_cgroups_mtx); if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } if (_cur_version >= new_fetched_resource.resourceVersion) { return AgentStatus::DORIS_SUCCESS; } const std::map& new_user_resource = new_fetched_resource.resourceByUser; if (!_local_users.empty()) { std::set::const_iterator old_it = _local_users.begin(); for (; old_it != _local_users.end(); ++old_it) { if (new_user_resource.count(*old_it) == 0) { this->delete_user_cgroups(*old_it); } } } // Clear local users set, because it will be inserted again _local_users.clear(); std::map::const_iterator new_it = new_user_resource.begin(); for (; new_it != new_user_resource.end(); ++new_it) { const string& user_name = new_it->first; const std::map& level_share = new_it->second.shareByGroup; std::map user_share; const std::map& resource_share = new_it->second.resource.resourceByType; std::map::const_iterator resource_it = resource_share.begin(); for (; resource_it != resource_share.end(); ++resource_it) { if (_s_resource_cgroups.count(resource_it->first) > 0) { user_share[_s_resource_cgroups[resource_it->first]] = resource_it->second; } } modify_user_cgroups(user_name, user_share, level_share); _config_user_disk_throttle(user_name, resource_share); // Insert user to local user's set _local_users.insert(user_name); } // Using resource version, not subscribe version _cur_version = new_fetched_resource.resourceVersion; return AgentStatus::DORIS_SUCCESS; } void CgroupsMgr::_config_user_disk_throttle(std::string user_name, const std::map& resource_share) { int64_t hdd_read_iops = _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_IOPS, resource_share); int64_t hdd_write_iops = _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_IOPS, resource_share); int64_t hdd_read_mbps = _get_resource_value(TResourceType::type::TRESOURCE_HDD_READ_MBPS, resource_share); int64_t hdd_write_mbps = _get_resource_value(TResourceType::type::TRESOURCE_HDD_WRITE_MBPS, resource_share); int64_t ssd_read_iops = _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_IOPS, resource_share); int64_t ssd_write_iops = _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_IOPS, resource_share); int64_t ssd_read_mbps = _get_resource_value(TResourceType::type::TRESOURCE_SSD_READ_MBPS, resource_share); int64_t ssd_write_mbps = _get_resource_value(TResourceType::type::TRESOURCE_SSD_WRITE_MBPS, resource_share); _config_disk_throttle(user_name, "", hdd_read_iops, hdd_write_iops, hdd_read_mbps, hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, ssd_write_mbps); _config_disk_throttle(user_name, "low", hdd_read_iops, hdd_write_iops, hdd_read_mbps, hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, ssd_write_mbps); _config_disk_throttle(user_name, "normal", hdd_read_iops, hdd_write_iops, hdd_read_mbps, hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, ssd_write_mbps); _config_disk_throttle(user_name, "high", hdd_read_iops, hdd_write_iops, hdd_read_mbps, hdd_write_mbps, ssd_read_iops, ssd_write_iops, ssd_read_mbps, ssd_write_mbps); } int64_t CgroupsMgr::_get_resource_value(const TResourceType::type resource_type, const std::map& resource_share) { int64_t resource_value = -1; std::map::const_iterator it = resource_share.find(resource_type); if (it != resource_share.end()) { resource_value = it->second; } return resource_value; } AgentStatus CgroupsMgr::_config_disk_throttle(std::string user_name, std::string level, int64_t hdd_read_iops, int64_t hdd_write_iops, int64_t hdd_read_mbps, int64_t hdd_write_mbps, int64_t ssd_read_iops, int64_t ssd_write_iops, int64_t ssd_read_mbps, int64_t ssd_write_mbps) { string cgroups_path = this->_root_cgroups_path + "/" + user_name + "/" + level; string read_bps_path = cgroups_path + "/blkio.throttle.read_bps_device"; string write_bps_path = cgroups_path + "/blkio.throttle.write_bps_device"; string read_iops_path = cgroups_path + "/blkio.throttle.read_iops_device"; string write_iops_path = cgroups_path + "/blkio.throttle.write_iops_device"; if (!is_file_exist(cgroups_path.c_str())) { if (!boost::filesystem::create_directory(cgroups_path)) { LOG(ERROR) << "Create cgroups: " << cgroups_path << " failed"; return AgentStatus::DORIS_ERROR; } } // add olap engine data path here auto stores = StorageEngine::instance()->get_stores(); // buld load data path, it is alreay in data path // _exec_env->load_path_mgr()->get_load_data_path(&data_paths); stringstream ctrl_cmd; for (auto store : stores) { // check disk type int64_t read_iops = hdd_read_iops; int64_t write_iops = hdd_write_iops; int64_t read_mbps = hdd_read_mbps; int64_t write_mbps = hdd_write_mbps; // if user set hdd not ssd, then use hdd for ssd if (store->is_ssd_disk()) { read_iops = ssd_read_iops == -1 ? hdd_read_iops : ssd_read_iops; write_iops = ssd_write_iops == -1 ? hdd_write_iops : ssd_write_iops; read_mbps = ssd_read_mbps == -1 ? hdd_read_mbps : ssd_read_mbps; write_mbps = ssd_write_mbps == -1 ? hdd_write_mbps : ssd_write_mbps; } struct stat file_stat; if (stat(store->path().c_str(), &file_stat) != 0) { continue; } int major_number = major(file_stat.st_dev); int minor_number = minor(file_stat.st_dev); minor_number = (minor_number / 16) * 16; if (read_iops != -1) { ctrl_cmd << major_number << ":" << minor_number << " " << read_iops; _echo_cmd_to_cgroup(ctrl_cmd, read_iops_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (write_iops != -1) { ctrl_cmd << major_number << ":" << minor_number << " " << write_iops; _echo_cmd_to_cgroup(ctrl_cmd, write_iops_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (read_mbps != -1) { ctrl_cmd << major_number << ":" << minor_number << " " << (read_mbps << 20); _echo_cmd_to_cgroup(ctrl_cmd, read_bps_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } if (write_mbps != -1) { ctrl_cmd << major_number << ":" << minor_number << " " << (write_mbps << 20); _echo_cmd_to_cgroup(ctrl_cmd, write_bps_path); ctrl_cmd.clear(); ctrl_cmd.str(std::string()); } } return AgentStatus::DORIS_SUCCESS; } AgentStatus CgroupsMgr::modify_user_cgroups(const string& user_name, const map& user_share, const map& level_share) { // Check if the user's cgroups exists, if not create it string user_cgroups_path = this->_root_cgroups_path + "/" + user_name; if (!is_file_exist(user_cgroups_path.c_str())) { if (!boost::filesystem::create_directory(user_cgroups_path)) { LOG(ERROR) << "Create cgroups for user " << user_name << " failed"; return AgentStatus::DORIS_ERROR; } } // Traverse the user resource share map to append share value to cgroup's file for (map::const_iterator user_resource = user_share.begin(); user_resource != user_share.end(); ++user_resource){ string resource_file_name = user_resource->first; int32_t user_share_weight = user_resource->second; // Append the share_weight value to the file string user_resource_path = user_cgroups_path + "/" + resource_file_name; std::ofstream user_cgroups(user_resource_path.c_str(), std::ios::out | std::ios::app); if (!user_cgroups.is_open()) { return AgentStatus::DORIS_ERROR; } user_cgroups << user_share_weight << std::endl; user_cgroups.close(); LOG(INFO) << "Append " << user_share_weight << " to " << user_resource_path; for (map::const_iterator level_resource = level_share.begin(); level_resource != level_share.end(); ++level_resource){ // Append resource share to level shares string level_name = level_resource->first; int32_t level_share_weight = level_resource->second; // Check if the level cgroups exist string level_cgroups_path = user_cgroups_path + "/" + level_name; if (!is_file_exist(level_cgroups_path.c_str())) { if (!boost::filesystem::create_directory(level_cgroups_path)) { return AgentStatus::DORIS_ERROR; } } // Append the share_weight value to the file string level_resource_path = level_cgroups_path + "/" + resource_file_name; std::ofstream level_cgroups(level_resource_path.c_str(), std::ios::out | std::ios::app); if (!level_cgroups.is_open()) { return AgentStatus::DORIS_ERROR; } level_cgroups << level_share_weight << std::endl; level_cgroups.close(); LOG(INFO) << "Append " << level_share_weight << " to " << level_resource_path; } } return AgentStatus::DORIS_SUCCESS; } AgentStatus CgroupsMgr::init_cgroups() { std::string root_cgroups_tasks_path = this->_root_cgroups_path + "/tasks"; // Check if the root cgroups exists if (is_directory(this->_root_cgroups_path.c_str()) && is_file_exist(root_cgroups_tasks_path.c_str())) { // Check the folder's virtual filesystem to find whether it is a cgroup file system #ifndef BE_TEST struct statfs fs_type; statfs(root_cgroups_tasks_path.c_str(), &fs_type); if (fs_type.f_type != CGROUP_SUPER_MAGIC) { LOG(ERROR) << _root_cgroups_path << " is not a cgroups file system."; _is_cgroups_init_success = false; return AgentStatus::DORIS_ERROR; } #endif // Check if current user have write permission to cgroup folder if (access(_root_cgroups_path.c_str(), W_OK) != 0) { LOG(ERROR) << "Doris does not have write permission to " << _root_cgroups_path; _is_cgroups_init_success = false; return AgentStatus::DORIS_ERROR; } // If root folder exists, then delete all subfolders under it boost::filesystem::directory_iterator item_begin(this->_root_cgroups_path); boost::filesystem::directory_iterator item_end; for (; item_begin != item_end; item_begin++) { if (is_directory(item_begin->path().string().c_str())) { // Delete the sub folder if (delete_user_cgroups(item_begin->path().filename().string()) != AgentStatus::DORIS_SUCCESS) { LOG(ERROR) << "Could not clean subfolder " << item_begin->path().string(); _is_cgroups_init_success = false; return AgentStatus::DORIS_ERROR; } } } LOG(INFO) << "Initialize doris cgroups successfully under folder " << _root_cgroups_path; _is_cgroups_init_success = true; return AgentStatus::DORIS_SUCCESS; } else { LOG(ERROR) << "Could not find a valid cgroups path for resource isolation," << "current value is " << _root_cgroups_path; _is_cgroups_init_success = false; return AgentStatus::DORIS_ERROR; } } #define gettid() syscall(__NR_gettid) void CgroupsMgr::apply_cgroup(const string& user_name, const string& level) { if (s_global_cg_mgr == nullptr) { return; } s_global_cg_mgr->assign_to_cgroups(user_name, level); } AgentStatus CgroupsMgr::assign_to_cgroups(const string& user_name, const string& level) { if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } int64_t tid = gettid(); return assign_thread_to_cgroups(tid, user_name, level); } AgentStatus CgroupsMgr::assign_thread_to_cgroups(int64_t thread_id, const string& user_name, const string& level) { if (!_is_cgroups_init_success) { return AgentStatus::DORIS_ERROR; } string tasks_path = _root_cgroups_path + "/" + user_name + "/" + level + "/tasks"; if (!is_file_exist(_root_cgroups_path + "/" + user_name)) { tasks_path = this->_root_cgroups_path + "/" + _default_user_name + "/" + _default_level + "/tasks"; } else if (!is_file_exist(_root_cgroups_path + "/" + user_name + "/" + level)) { tasks_path = this->_root_cgroups_path + "/" + user_name + "/tasks"; } if (!is_file_exist(tasks_path.c_str())) { LOG(ERROR) << "Cgroups path " << tasks_path << " not exist!"; return AgentStatus::DORIS_ERROR; } std::ofstream tasks(tasks_path.c_str(), std::ios::out | std::ios::app); if (!tasks.is_open()) { // This means doris could not open this file. May be it does not have access to it LOG(ERROR) << "Echo thread: " << thread_id << " to " << tasks_path << " failed!"; return AgentStatus::DORIS_ERROR; } // Append thread id to the tasks file directly tasks << thread_id << std::endl; tasks.close(); return AgentStatus::DORIS_SUCCESS; } AgentStatus CgroupsMgr::delete_user_cgroups(const string& user_name) { string user_cgroups_path = this->_root_cgroups_path + "/" + user_name; if (is_file_exist(user_cgroups_path.c_str())) { // Delete sub cgroups --> level cgroups boost::filesystem::directory_iterator item_begin(user_cgroups_path); boost::filesystem::directory_iterator item_end; for (; item_begin != item_end; item_begin++) { if (is_directory(item_begin->path().string().c_str())) { string cur_cgroups_path = item_begin->path().string(); if (this->drop_cgroups(cur_cgroups_path) < 0) { return AgentStatus::DORIS_ERROR; } } } // Delete user cgroups if (this->drop_cgroups(user_cgroups_path) < 0) { return AgentStatus::DORIS_ERROR; } } return AgentStatus::DORIS_SUCCESS; } AgentStatus CgroupsMgr::drop_cgroups(const string& deleted_cgroups_path) { // Try to delete the cgroups folder // If failed then there maybe exist active tasks under it and try to relocate them // Currently, try 10 times to relocate and delete the cgroups. int32_t i = 0; while (is_file_exist(deleted_cgroups_path) && rmdir(deleted_cgroups_path.c_str()) < 0 && i < this->_drop_retry_times) { this->relocate_tasks(deleted_cgroups_path, this->_root_cgroups_path); ++i; #ifdef BE_TEST boost::filesystem::remove_all(deleted_cgroups_path); #endif if (i == this->_drop_retry_times){ LOG(ERROR) << "drop cgroups under path: " << deleted_cgroups_path << " failed."; return AgentStatus::DORIS_ERROR; } } return AgentStatus::DORIS_SUCCESS; } AgentStatus CgroupsMgr::relocate_tasks(const string& src_cgroups, const string& dest_cgroups) { string src_tasks_path = src_cgroups + "/tasks"; string dest_tasks_path = dest_cgroups + "/tasks"; std::ifstream src_tasks(src_tasks_path.c_str()); if (!src_tasks) { return AgentStatus::DORIS_ERROR; } std::ofstream dest_tasks(dest_tasks_path.c_str(), std::ios::out | std::ios::app); if (!dest_tasks) { return AgentStatus::DORIS_ERROR; } int64_t taskid; while (src_tasks >> taskid) { dest_tasks << taskid << std::endl; // If the thread id or process id not exists, then error occurs in the stream. // Clear the error state for every append. dest_tasks.clear(); } src_tasks.close(); dest_tasks.close(); return AgentStatus::DORIS_SUCCESS; } void CgroupsMgr::_echo_cmd_to_cgroup(stringstream& ctrl_cmd, string& cgroups_path) { std::ofstream cgroups_stream(cgroups_path.c_str(), std::ios::out | std::ios::app); if (cgroups_stream.is_open()) { cgroups_stream << ctrl_cmd.str() << std::endl; cgroups_stream.close(); } } bool CgroupsMgr::is_directory(const char* file_path) { struct stat file_stat; if (stat(file_path, &file_stat) != 0) { return false; } if (S_ISDIR(file_stat.st_mode)) { return true; } else { return false; } } bool CgroupsMgr::is_file_exist(const char* file_path) { struct stat file_stat; if (stat(file_path, &file_stat) != 0) { return false; } return true; } bool CgroupsMgr::is_file_exist(const std::string& file_path) { return is_file_exist(file_path.c_str()); } } // namespace doris