[feature-wip](resouce-group) Supports memory soft isolation of resource group (#19802)
create resource groups name properties(
'enable_memory_overcommit' = 'true' // whether to enable memory soft isolation
)
This commit is contained in:
@ -230,7 +230,8 @@ void Daemon::memory_gc_thread() {
|
||||
auto sys_mem_available = doris::MemInfo::sys_mem_available();
|
||||
auto proc_mem_no_allocator_cache = doris::MemInfo::proc_mem_no_allocator_cache();
|
||||
|
||||
auto tg_free_mem = taskgroup::TaskGroupManager::instance()->memory_limit_gc();
|
||||
// GC excess memory for resource groups that not enable overcommit
|
||||
auto tg_free_mem = doris::MemInfo::tg_hard_memory_limit_gc();
|
||||
sys_mem_available += tg_free_mem;
|
||||
proc_mem_no_allocator_cache -= tg_free_mem;
|
||||
|
||||
|
||||
@ -487,21 +487,13 @@ int64_t MemTrackerLimiter::free_top_overcommit_query(
|
||||
}
|
||||
|
||||
int64_t MemTrackerLimiter::tg_memory_limit_gc(
|
||||
uint64_t id, const std::string& name, int64_t memory_limit,
|
||||
int64_t need_free_mem, int64_t used_memory, uint64_t id, const std::string& name,
|
||||
int64_t memory_limit,
|
||||
std::vector<taskgroup::TgTrackerLimiterGroup>& tracker_limiter_groups) {
|
||||
int64_t used_memory = 0;
|
||||
for (auto& mem_tracker_group : tracker_limiter_groups) {
|
||||
std::lock_guard<std::mutex> l(mem_tracker_group.group_lock);
|
||||
for (const auto& tracker : mem_tracker_group.trackers) {
|
||||
used_memory += tracker->consumption();
|
||||
}
|
||||
}
|
||||
|
||||
if (used_memory <= memory_limit) {
|
||||
if (need_free_mem <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t need_free_mem = used_memory - memory_limit;
|
||||
int64_t freed_mem = 0;
|
||||
constexpr auto query_type = MemTrackerLimiter::Type::QUERY;
|
||||
auto cancel_str = [id, &name, memory_limit, used_memory](int64_t mem_consumption,
|
||||
|
||||
@ -200,7 +200,8 @@ public:
|
||||
}
|
||||
|
||||
static int64_t tg_memory_limit_gc(
|
||||
uint64_t id, const std::string& name, int64_t memory_limit,
|
||||
int64_t request_free_memory, int64_t used_memory, uint64_t id, const std::string& name,
|
||||
int64_t memory_limit,
|
||||
std::vector<taskgroup::TgTrackerLimiterGroup>& tracker_limiter_groups);
|
||||
|
||||
// only for Type::QUERY or Type::LOAD.
|
||||
|
||||
@ -30,7 +30,6 @@
|
||||
#include "pipeline/task_scheduler.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
#include "service/backend_options.h"
|
||||
#include "util/mem_info.h"
|
||||
#include "util/parse_util.h"
|
||||
|
||||
@ -39,6 +38,7 @@ namespace taskgroup {
|
||||
|
||||
const static std::string CPU_SHARE = "cpu_share";
|
||||
const static std::string MEMORY_LIMIT = "memory_limit";
|
||||
const static std::string ENABLE_MEMORY_OVERCOMMIT = "enable_memory_overcommit";
|
||||
|
||||
pipeline::PipelineTask* TaskGroupEntity::take() {
|
||||
if (_queue.empty()) {
|
||||
@ -81,15 +81,18 @@ TaskGroup::TaskGroup(const TaskGroupInfo& tg_info)
|
||||
_name(tg_info.name),
|
||||
_cpu_share(tg_info.cpu_share),
|
||||
_memory_limit(tg_info.memory_limit),
|
||||
_enable_memory_overcommit(tg_info.enable_memory_overcommit),
|
||||
_version(tg_info.version),
|
||||
_task_entity(this),
|
||||
_mem_tracker_limiter_pool(MEM_TRACKER_GROUP_NUM) {}
|
||||
|
||||
std::string TaskGroup::debug_string() const {
|
||||
std::shared_lock<std::shared_mutex> rl {_mutex};
|
||||
return fmt::format("TG[id = {}, name = {}, cpu_share = {}, memory_limit = {}, version = {}]",
|
||||
_id, _name, cpu_share(), PrettyPrinter::print(_memory_limit, TUnit::BYTES),
|
||||
_version);
|
||||
return fmt::format(
|
||||
"TG[id = {}, name = {}, cpu_share = {}, memory_limit = {}, enable_memory_overcommit = "
|
||||
"{}, version = {}]",
|
||||
_id, _name, cpu_share(), PrettyPrinter::print(_memory_limit, TUnit::BYTES),
|
||||
_enable_memory_overcommit ? "true" : "false", _version);
|
||||
}
|
||||
|
||||
void TaskGroup::check_and_update(const TaskGroupInfo& tg_info) {
|
||||
@ -108,6 +111,7 @@ void TaskGroup::check_and_update(const TaskGroupInfo& tg_info) {
|
||||
_name = tg_info.name;
|
||||
_version = tg_info.version;
|
||||
_memory_limit = tg_info.memory_limit;
|
||||
_enable_memory_overcommit = tg_info.enable_memory_overcommit;
|
||||
if (_cpu_share != tg_info.cpu_share) {
|
||||
ExecEnv::GetInstance()->pipeline_task_group_scheduler()->update_tg_cpu_share(
|
||||
tg_info, shared_from_this());
|
||||
@ -119,6 +123,17 @@ void TaskGroup::update_cpu_share_unlock(const TaskGroupInfo& tg_info) {
|
||||
_cpu_share = tg_info.cpu_share;
|
||||
}
|
||||
|
||||
int64_t TaskGroup::memory_used() {
|
||||
int64_t used_memory = 0;
|
||||
for (auto& mem_tracker_group : _mem_tracker_limiter_pool) {
|
||||
std::lock_guard<std::mutex> l(mem_tracker_group.group_lock);
|
||||
for (const auto& tracker : mem_tracker_group.trackers) {
|
||||
used_memory += tracker->consumption();
|
||||
}
|
||||
}
|
||||
return used_memory;
|
||||
}
|
||||
|
||||
void TaskGroup::add_mem_tracker_limiter(std::shared_ptr<MemTrackerLimiter> mem_tracker_ptr) {
|
||||
auto group_num = mem_tracker_ptr->group_num();
|
||||
std::lock_guard<std::mutex> l(_mem_tracker_limiter_pool[group_num].group_lock);
|
||||
@ -131,16 +146,14 @@ void TaskGroup::remove_mem_tracker_limiter(std::shared_ptr<MemTrackerLimiter> me
|
||||
_mem_tracker_limiter_pool[group_num].trackers.erase(mem_tracker_ptr);
|
||||
}
|
||||
|
||||
int64_t TaskGroup::memory_limit_gc() {
|
||||
std::string name;
|
||||
int64_t memory_limit;
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> rl {_mutex};
|
||||
name = _name;
|
||||
memory_limit = _memory_limit;
|
||||
}
|
||||
return MemTrackerLimiter::tg_memory_limit_gc(_id, name, memory_limit,
|
||||
_mem_tracker_limiter_pool);
|
||||
void TaskGroup::task_group_info(TaskGroupInfo* tg_info) const {
|
||||
std::shared_lock<std::shared_mutex> r_lock(_mutex);
|
||||
tg_info->id = _id;
|
||||
tg_info->name = _name;
|
||||
tg_info->cpu_share = _cpu_share;
|
||||
tg_info->memory_limit = _memory_limit;
|
||||
tg_info->enable_memory_overcommit = _enable_memory_overcommit;
|
||||
tg_info->version = _version;
|
||||
}
|
||||
|
||||
Status TaskGroupInfo::parse_group_info(const TPipelineResourceGroup& resource_group,
|
||||
@ -174,6 +187,12 @@ Status TaskGroupInfo::parse_group_info(const TPipelineResourceGroup& resource_gr
|
||||
return Status::InternalError(ss.str());
|
||||
}
|
||||
task_group_info->memory_limit = mem_limit;
|
||||
|
||||
auto enable_memory_overcommit_iter = resource_group.properties.find(ENABLE_MEMORY_OVERCOMMIT);
|
||||
task_group_info->enable_memory_overcommit =
|
||||
enable_memory_overcommit_iter != resource_group.properties.end() &&
|
||||
enable_memory_overcommit_iter->second ==
|
||||
"true" /* fe guarantees it is 'true' or 'false' */;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -87,6 +87,18 @@ public:
|
||||
|
||||
uint64_t id() const { return _id; }
|
||||
|
||||
bool enable_memory_overcommit() const {
|
||||
std::shared_lock<std::shared_mutex> r_lock(_mutex);
|
||||
return _enable_memory_overcommit;
|
||||
};
|
||||
|
||||
bool memory_limit() const {
|
||||
std::shared_lock<std::shared_mutex> r_lock(_mutex);
|
||||
return _memory_limit;
|
||||
};
|
||||
|
||||
int64_t memory_used();
|
||||
|
||||
std::string debug_string() const;
|
||||
|
||||
void check_and_update(const TaskGroupInfo& tg_info);
|
||||
@ -97,7 +109,11 @@ public:
|
||||
|
||||
void remove_mem_tracker_limiter(std::shared_ptr<MemTrackerLimiter> mem_tracker_ptr);
|
||||
|
||||
int64_t memory_limit_gc();
|
||||
void task_group_info(TaskGroupInfo* tg_info) const;
|
||||
|
||||
std::vector<TgTrackerLimiterGroup>& mem_tracker_limiter_pool() {
|
||||
return _mem_tracker_limiter_pool;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex _mutex; // lock _name, _version, _cpu_share, _memory_limit
|
||||
@ -105,6 +121,7 @@ private:
|
||||
std::string _name;
|
||||
std::atomic<uint64_t> _cpu_share;
|
||||
int64_t _memory_limit; // bytes
|
||||
bool _enable_memory_overcommit;
|
||||
int64_t _version;
|
||||
TaskGroupEntity _task_entity;
|
||||
|
||||
@ -117,8 +134,9 @@ struct TaskGroupInfo {
|
||||
uint64_t id;
|
||||
std::string name;
|
||||
uint64_t cpu_share;
|
||||
int64_t version;
|
||||
int64_t memory_limit;
|
||||
bool enable_memory_overcommit;
|
||||
int64_t version;
|
||||
|
||||
static Status parse_group_info(const TPipelineResourceGroup& resource_group,
|
||||
TaskGroupInfo* task_group_info);
|
||||
|
||||
@ -54,20 +54,14 @@ TaskGroupPtr TaskGroupManager::get_or_create_task_group(const TaskGroupInfo& tas
|
||||
return new_task_group;
|
||||
}
|
||||
|
||||
int64_t TaskGroupManager::memory_limit_gc() {
|
||||
int64_t total_free_memory = 0;
|
||||
std::vector<TaskGroupPtr> task_groups;
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> r_lock(_group_mutex);
|
||||
task_groups.reserve(_task_groups.size());
|
||||
for (const auto& [id, task_group] : _task_groups) {
|
||||
task_groups.push_back(task_group);
|
||||
void TaskGroupManager::get_resource_groups(const std::function<bool(const TaskGroupPtr& ptr)>& pred,
|
||||
std::vector<TaskGroupPtr>* task_groups) {
|
||||
std::shared_lock<std::shared_mutex> r_lock(_group_mutex);
|
||||
for (const auto& [id, task_group] : _task_groups) {
|
||||
if (pred(task_group)) {
|
||||
task_groups->push_back(task_group);
|
||||
}
|
||||
}
|
||||
for (const auto& task_group : task_groups) {
|
||||
total_free_memory += task_group->memory_limit_gc();
|
||||
}
|
||||
return total_free_memory;
|
||||
}
|
||||
|
||||
} // namespace doris::taskgroup
|
||||
|
||||
@ -33,7 +33,8 @@ public:
|
||||
|
||||
TaskGroupPtr get_or_create_task_group(const TaskGroupInfo& task_group_info);
|
||||
|
||||
int64_t memory_limit_gc();
|
||||
void get_resource_groups(const std::function<bool(const TaskGroupPtr& ptr)>& pred,
|
||||
std::vector<TaskGroupPtr>* task_groups);
|
||||
|
||||
private:
|
||||
std::shared_mutex _group_mutex;
|
||||
|
||||
@ -43,6 +43,8 @@
|
||||
#include "olap/segment_loader.h"
|
||||
#include "runtime/memory/chunk_allocator.h"
|
||||
#include "runtime/memory/mem_tracker_limiter.h"
|
||||
#include "runtime/task_group/task_group.h"
|
||||
#include "runtime/task_group/task_group_manager.h"
|
||||
#include "util/cgroup_util.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/parse_util.h"
|
||||
@ -132,7 +134,8 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
|
||||
}
|
||||
|
||||
// step1: free all cache
|
||||
// step2: free top overcommit query, if enable query memroy overcommit
|
||||
// step2: free resource groups memory that enable overcommit
|
||||
// step3: free global top overcommit query, if enable query memroy overcommit
|
||||
// TODO Now, the meaning is different from java minor gc + full gc, more like small gc + large gc.
|
||||
bool MemInfo::process_minor_gc() {
|
||||
MonotonicStopWatch watch;
|
||||
@ -154,6 +157,11 @@ bool MemInfo::process_minor_gc() {
|
||||
// TODO add freed_mem
|
||||
SegmentLoader::instance()->prune();
|
||||
|
||||
freed_mem += tg_soft_memory_limit_gc(_s_process_minor_gc_size - freed_mem);
|
||||
if (freed_mem > _s_process_minor_gc_size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
VLOG_NOTICE << MemTrackerLimiter::type_detail_usage(
|
||||
"Before free top memory overcommit query in Minor GC", MemTrackerLimiter::Type::QUERY);
|
||||
if (config::enable_query_memroy_overcommit) {
|
||||
@ -167,9 +175,10 @@ bool MemInfo::process_minor_gc() {
|
||||
}
|
||||
|
||||
// step1: free all cache
|
||||
// step2: free top memory query
|
||||
// step3: free top overcommit load, load retries are more expensive, So cancel at the end.
|
||||
// step4: free top memory load
|
||||
// step2: free resource groups memory that enable overcommit
|
||||
// step3: free global top memory query
|
||||
// step4: free top overcommit load, load retries are more expensive, So cancel at the end.
|
||||
// step5: free top memory load
|
||||
bool MemInfo::process_full_gc() {
|
||||
MonotonicStopWatch watch;
|
||||
watch.start();
|
||||
@ -197,6 +206,11 @@ bool MemInfo::process_full_gc() {
|
||||
}
|
||||
}
|
||||
|
||||
freed_mem += tg_soft_memory_limit_gc(_s_process_full_gc_size - freed_mem);
|
||||
if (freed_mem > _s_process_full_gc_size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
VLOG_NOTICE << MemTrackerLimiter::type_detail_usage("Before free top memory query in Full GC",
|
||||
MemTrackerLimiter::Type::QUERY);
|
||||
freed_mem += MemTrackerLimiter::free_top_memory_query(_s_process_full_gc_size - freed_mem,
|
||||
@ -225,6 +239,68 @@ bool MemInfo::process_full_gc() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int64_t MemInfo::tg_hard_memory_limit_gc() {
|
||||
std::vector<taskgroup::TaskGroupPtr> task_groups;
|
||||
taskgroup::TaskGroupManager::instance()->get_resource_groups(
|
||||
[](const taskgroup::TaskGroupPtr& task_group) {
|
||||
return !task_group->enable_memory_overcommit();
|
||||
},
|
||||
&task_groups);
|
||||
|
||||
int64_t total_free_memory = 0;
|
||||
for (const auto& task_group : task_groups) {
|
||||
taskgroup::TaskGroupInfo tg_info;
|
||||
task_group->task_group_info(&tg_info);
|
||||
auto used = task_group->memory_used();
|
||||
total_free_memory += MemTrackerLimiter::tg_memory_limit_gc(
|
||||
used - tg_info.memory_limit, used, tg_info.id, tg_info.name, tg_info.memory_limit,
|
||||
task_group->mem_tracker_limiter_pool());
|
||||
}
|
||||
return total_free_memory;
|
||||
}
|
||||
|
||||
int64_t MemInfo::tg_soft_memory_limit_gc(int64_t request_free_memory) {
|
||||
std::vector<taskgroup::TaskGroupPtr> task_groups;
|
||||
taskgroup::TaskGroupManager::instance()->get_resource_groups(
|
||||
[](const taskgroup::TaskGroupPtr& task_group) {
|
||||
return task_group->enable_memory_overcommit();
|
||||
},
|
||||
&task_groups);
|
||||
|
||||
int64_t total_exceeded_memory = 0;
|
||||
std::vector<int64_t> used_memorys;
|
||||
std::vector<int64_t> exceeded_memorys;
|
||||
for (const auto& task_group : task_groups) {
|
||||
auto used_memory = task_group->memory_used();
|
||||
auto exceeded = used_memory - task_group->memory_limit();
|
||||
auto exceeded_memory = exceeded > 0 ? exceeded : 0;
|
||||
total_exceeded_memory += exceeded_memory;
|
||||
used_memorys.emplace_back(used_memory);
|
||||
exceeded_memorys.emplace_back(exceeded_memory);
|
||||
}
|
||||
|
||||
int64_t total_free_memory = 0;
|
||||
bool gc_all_exceeded = request_free_memory >= total_exceeded_memory;
|
||||
for (int i = 0; i < task_groups.size(); ++i) {
|
||||
if (exceeded_memorys[i] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// todo: GC according to resource group priority
|
||||
int64_t tg_need_free_memory =
|
||||
gc_all_exceeded ? exceeded_memorys[i]
|
||||
: static_cast<double>(exceeded_memorys[i]) / total_exceeded_memory *
|
||||
request_free_memory /* exceeded memory as a weight */;
|
||||
auto task_group = task_groups[i];
|
||||
taskgroup::TaskGroupInfo tg_info;
|
||||
task_group->task_group_info(&tg_info);
|
||||
total_free_memory += MemTrackerLimiter::tg_memory_limit_gc(
|
||||
tg_need_free_memory, used_memorys[i], tg_info.id, tg_info.name,
|
||||
tg_info.memory_limit, task_group->mem_tracker_limiter_pool());
|
||||
}
|
||||
return total_free_memory;
|
||||
}
|
||||
|
||||
#ifndef __APPLE__
|
||||
void MemInfo::refresh_proc_meminfo() {
|
||||
std::ifstream meminfo("/proc/meminfo", std::ios::in);
|
||||
|
||||
@ -123,6 +123,10 @@ public:
|
||||
static bool process_minor_gc();
|
||||
static bool process_full_gc();
|
||||
|
||||
static int64_t tg_hard_memory_limit_gc();
|
||||
|
||||
static int64_t tg_soft_memory_limit_gc(int64_t request_free_memory);
|
||||
|
||||
// It is only used after the memory limit is exceeded. When multiple threads are waiting for the available memory of the process,
|
||||
// avoid multiple threads starting at the same time and causing OOM.
|
||||
static std::atomic<int64_t> refresh_interval_memory_growth;
|
||||
|
||||
@ -46,11 +46,13 @@ public class ResourceGroup implements Writable {
|
||||
|
||||
public static final String MEMORY_LIMIT = "memory_limit";
|
||||
|
||||
public static final String ENABLE_MEMORY_OVERCOMMIT = "enable_memory_overcommit";
|
||||
|
||||
private static final ImmutableSet<String> REQUIRED_PROPERTIES_NAME = new ImmutableSet.Builder<String>().add(
|
||||
CPU_SHARE).add(MEMORY_LIMIT).build();
|
||||
|
||||
private static final ImmutableSet<String> ALL_PROPERTIES_NAME = new ImmutableSet.Builder<String>().add(
|
||||
CPU_SHARE).add(MEMORY_LIMIT).build();
|
||||
CPU_SHARE).add(MEMORY_LIMIT).add(ENABLE_MEMORY_OVERCOMMIT).build();
|
||||
|
||||
@SerializedName(value = "id")
|
||||
private long id;
|
||||
@ -78,6 +80,9 @@ public class ResourceGroup implements Writable {
|
||||
this.version = version;
|
||||
String memoryLimitString = properties.get(MEMORY_LIMIT);
|
||||
this.memoryLimitPercent = Double.parseDouble(memoryLimitString.substring(0, memoryLimitString.length() - 1));
|
||||
if (properties.containsKey(ENABLE_MEMORY_OVERCOMMIT)) {
|
||||
properties.put(ENABLE_MEMORY_OVERCOMMIT, properties.get(ENABLE_MEMORY_OVERCOMMIT).toLowerCase());
|
||||
}
|
||||
}
|
||||
|
||||
public static ResourceGroup create(String name, Map<String, String> properties) throws DdlException {
|
||||
@ -129,6 +134,13 @@ public class ResourceGroup implements Writable {
|
||||
LOG.debug(memLimitErr, e);
|
||||
throw new DdlException(memLimitErr);
|
||||
}
|
||||
|
||||
if (properties.containsKey(ENABLE_MEMORY_OVERCOMMIT)) {
|
||||
String value = properties.get(ENABLE_MEMORY_OVERCOMMIT).toLowerCase();
|
||||
if (!("true".equals(value) || "false".equals(value))) {
|
||||
throw new DdlException("The value of '" + ENABLE_MEMORY_OVERCOMMIT + "' must be true or false.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
|
||||
@ -123,7 +123,8 @@ public class ResourceGroupMgr implements Writable, GsonPostProcessable {
|
||||
}
|
||||
Map<String, String> properties = Maps.newHashMap();
|
||||
properties.put(ResourceGroup.CPU_SHARE, "10");
|
||||
properties.put(ResourceGroup.MEMORY_LIMIT, "100%");
|
||||
properties.put(ResourceGroup.MEMORY_LIMIT, "30%");
|
||||
properties.put(ResourceGroup.ENABLE_MEMORY_OVERCOMMIT, "true");
|
||||
defaultResourceGroup = ResourceGroup.create(DEFAULT_GROUP_NAME, properties);
|
||||
nameToResourceGroup.put(DEFAULT_GROUP_NAME, defaultResourceGroup);
|
||||
idToResourceGroup.put(defaultResourceGroup.getId(), defaultResourceGroup);
|
||||
|
||||
Reference in New Issue
Block a user