From fb925bdd08c6b733ea9e36e85f63849444e643b4 Mon Sep 17 00:00:00 2001 From: xy720 <22125576+xy720@users.noreply.github.com> Date: Fri, 15 Dec 2023 19:17:18 +0800 Subject: [PATCH] [Bug](memory) Fix exception-unsafe in aggregation node (#28483) The alloc function may throw std::bad_alloc exception when the process memory exceed limit. be.INFO: W1214 09:14:17.434849 771103 mem_tracker_limiter.cpp:204] Memory limit exceeded:, process memory used 20.41 GB exceed limit 18.76 GB or sys mem available 9.04 GB less than low water mark 1.60 GB, failed alloc siz e 1.86 MB>, executing msg:>. backend xx.x.x.xxx process memory used 20.41 GB, limit 18.76 GB. If query tracker exceed, set ex ec_mem_limit=8G to change limit, details see be.INFO. Process Memory Summary: OS physical memory 31.26 GB. Process memory usage 20.41 GB, limit 18.76 GB, soft limit 16.88 GB. Sys available memory 9.04 GB, low wa ter mark 1.60 GB, warning water mark 3.20 GB. Refresh interval memory growth 0 B Alloc Stacktrace: @ 0x555cd858bee9 doris::MemTrackerLimiter::print_log_usage() @ 0x555cd859a384 doris::ThreadMemTrackerMgr::exceeded() @ 0x555cd85a0ac4 malloc @ 0x555cd8fcf368 Allocator<>::alloc() @ 0x555cd8fdbdaf doris::vectorized::Arena::add_chunk() @ 0x555cd96dc0ab doris::vectorized::AggregateDataContainer::_expand() @ 0x555cd96aded8 (unknown) @ 0x555cd969fa2c doris::vectorized::AggregationNode::_pre_agg_with_serialized_key() @ 0x555cd96d1d61 std::_Function_handler<>::_M_invoke() @ 0x555cd967ab0b doris::vectorized::AggregationNode::get_next() @ 0x555cd81282a6 doris::ExecNode::get_next_after_projects() @ 0x555cd8452968 doris::PlanFragmentExecutor::get_vectorized_internal() @ 0x555cd845553b doris::PlanFragmentExecutor::open_vectorized_internal() @ 0x555cd8456a9e doris::PlanFragmentExecutor::open() @ 0x555cd842f200 doris::FragmentExecState::execute() @ 0x555cd843280e doris::FragmentMgr::_exec_actual() @ 0x555cd8432d42 _ZNSt17_Function_handlerIFvvEZN5doris11FragmentMgr18exec_plan_fragmentERKNS1_23TExecPlanFragmentParamsESt8funct ionIFvPNS1_20PlanFragmentExecutorEEEEUlvE_E9_M_invokeERKSt9_Any_data @ 0x555cd86ead05 doris::ThreadPool::dispatch_thread() @ 0x555cd86e015f doris::Thread::supervise_thread() @ 0x7f3321593ea5 start_thread @ 0x7f33218a69fd __clone @ (nil) (unknown) --- be/src/vec/exec/vaggregation_node.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h index fba82aa8c9..30eb8aa9d3 100644 --- a/be/src/vec/exec/vaggregation_node.h +++ b/be/src/vec/exec/vaggregation_node.h @@ -326,12 +326,26 @@ public: private: void _expand() { _index_in_sub_container = 0; - _current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY); - _key_containers.emplace_back(_current_keys); + _current_keys = nullptr; + _current_agg_data = nullptr; + try { + _current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY); + _key_containers.emplace_back(_current_keys); - _current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states * - SUB_CONTAINER_CAPACITY); - _value_containers.emplace_back(_current_agg_data); + _current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states * + SUB_CONTAINER_CAPACITY); + _value_containers.emplace_back(_current_agg_data); + } catch (...) { + if (_current_keys) { + _key_containers.pop_back(); + _current_keys = nullptr; + } + if (_current_agg_data) { + _value_containers.pop_back(); + _current_agg_data = nullptr; + } + throw; + } } static constexpr uint32_t SUB_CONTAINER_CAPACITY = 8192;